User:Érico/cleanup.js

Note: After saving, you have to bypass your browser's cache to see the changes. Internet Explorer: press Ctrl-F5, Mozilla: hold down Shift while clicking Reload (or press Ctrl-Shift-R), Opera/Konqueror: press F5, Safari: hold down Shift + Alt while clicking Reload, Chrome: hold down Shift while clicking Reload.
Documentation for this user script can be added at User:Érico/cleanup.
Report page listing warnings and errors.
//<nowiki>
// Script for cleanup of file description pages. Script does NOT work with Internet Explorer 8 or lower. Currently known to work with Google Chrome (best functionality), Firefox (usually just as well, but having occasional bugs), Opera, Internet Explorer 9, and probably Safari.
// To use, add this line to your common.js file: importScript('User:Magog the Ogre/cleanup.js');
// mto_cleanup tagger (initially developed by Patstuart/Magog the Ogre)

function bot_move_checked(text)
{
  text= text.replace(/\{\{BotMoveToCommons.*\}\}\s*\n/, '');
  text= text.replace(/\{\{CH2MoveToCommons\|[a-z\-]+.w[a-z]+\|year\=\d+\|month=\w+\|day\=\d\}\}\s*\n/, '');
  text= text.replace('The tool and the bot are operated by [[User:Jan Luca]] and [[User:Magnus Manske]].', '');
  text = text.replace('The upload bot is [[User:CommonsHelper2 Bot]] which is called by [http://toolserver.org/~commonshelper2/index.php CommonsHelper2].', '');
  return text;
}

/**********************
 * Function which removes <nowiki> and <!--...--> from the text, and replaces them with a set of 
 *   temporary strings. This is necessary, because functions which change text will may to leave the 
 *   nowikis/comments intact. The temporary strings function as "markers", so that 
 *   rebuild_comments_nowikis() can later be called and the nowikis/comments will return to the correct 
 *   position, untouched. 
 * Returns: An array, whose 0th element contains the altered string, and whose 1st element contains 
 *   another array which will focus as a set "tokens", which should be kept unaltered and passed to 
 *   rebuild_comments_nowikis() later.
 **********************/
function parse_comments_nowikis (text)
{
  "use strict";

  //constants
  var nw_regex = /<nowiki>((?:.|\n)*?)<\/nowiki>/;
  var c_regex = /<!\-\-((?:.|\n)*?)\-\->/;
  var cs_regex = /<\!\-\-(\s*categories\s*by\s*(?:commonsense|checkusage)\s*)\-\->/i; /* special one for CommonSense.... causing grouping headaches */

  //variables
  var comments = new Array(0);
  var nowikis = new Array(0);
  var comments_cs = new Array(0);
  var nw_com_order = new Array(0);
  while (1)
  {
    var next_nw = text.search(nw_regex);
    var next_c  = text.search(c_regex );
    var next_cs = text.search(cs_regex);
    if (next_nw === -1 && next_c === -1) {break;} /* cs_regex will be -1 if c_regex is -1 */

    var index;
    if (next_nw === -1 || (next_c !==-1 && next_c<next_nw))
    {
      if (next_cs === next_c)
      {
        index = comments_cs.length;
        comments_cs.push(cs_regex.exec(text)[1]);
        text = text.replace(c_regex, "%%%MTOCSCOMMENT"+index+"%%%");
        nw_com_order.push('s');
      }
      else
      {
        index = comments.length;
        comments.push(c_regex.exec(text)[1]);
        text = text.replace(c_regex, "%%%MTOCOMMENT"+index+"%%%");
        nw_com_order.push('c');
      }
    }
    else
    {
      index = nowikis.length;
      nowikis.push(nw_regex.exec(text)[1]);
      text = text.replace(nw_regex, "%%%MTONOWIKI"+index+"%%%");
      nw_com_order.push('n');
    }
  }

  return [text, [comments, nowikis, comments_cs, nw_com_order]];
}

/**********************
 * See function immediately above for explanation
 * remove_whitespace: when rebuilding nowikis, remove unnecessary whitespace (not shown in final version anyway)
 * Returns: Rebuilt text.
 **********************/
function rebuild_comments_nowikis(text, tokens, remove_whitespace)
{
  "use strict";

  var comments = tokens[0];
  var nowikis = tokens[1];
  var comments_cs = tokens[2];
  var nw_com_order = tokens[3];

  while (nw_com_order.length>0)
  {
    var next = nw_com_order.pop();
    if (next==='s')
     {text = text.replace("%%%MTOCSCOMMENT"+(comments_cs.length-1)+"%%%", "<!--"+comments_cs.pop()+"-->");}
    else if (next==='c')
     {text = text.replace("%%%MTOCOMMENT"+(comments.length-1)+"%%%", "<!--"+comments.pop()+"-->");}
    else /* i.e., nw_com_order.pop was 'n'; also, mediawiki registers all whitespace inside nw's as just one space */
    {
      var text_tmp = nowikis.pop();
      if (remove_whitespace) {text_tmp = text_tmp.replace(/\s+/g, ' ');}
      text = text.replace("%%%MTONOWIKI"+(nowikis.length)+"%%%", "<nowiki>"+text_tmp+"</"+"nowiki>");
    }
  }
  return text;
}

/**********************
 * Function which iterates over 'text' and replaces each instance of 'code' with 'replacement' until 
 *   running the iteration no longer results in a change of the text.
 * Parameter code: Should be a string or regular expression (both are allowed by the
 *   replace() function in Javascript).
 * Parameter replacement: Should be a string or a functor (both are allowed by the replace() function
 *   in Javascript).
 * Parameter text: The text that will be altered.
 * Returns: The altered text.
 **********************/
function iterative_replace(code, replacement, text)
{
  "use strict";

  while(1)  //only headers that aren't already inside a template will suffice, per what seems right to me
  {
    var temptext = text.replace(code, replacement);
    if (temptext===text) {break;}
    text=temptext;
  }
  return text;
}

function mto_parse_date(input_string, month_name_re /* case insensitive*/, month_number /* 1-12 */)
{
  "use strict";

  var regexp1 = new RegExp("(^\\s*\\|\\s*[Dd]ate\\s*\\=\\s*)"+month_name_re+"[\\\\\\/\\-\\.\\s\\,]+(\\d{1,2})(?:st|nd|rd|th)?[\\\\\\/\\-\\.\\s\\,]+(\\d{4})(?!\\d)", "mi");
  var regexp2 = new RegExp("(^\\s*\\|\\s*[Dd]ate\\s*\\=\\s*)(\\d{1,2})(?:st|nd|rd|th)?[\\\\\\/\\-\\.\\s\\,]+"+month_name_re+"[\\\\\\/\\-\\.\\s\\,]+(\\d{4})(?!\\d)", "mi");
  var regexp3 = new RegExp("(^\\s*\\|\\s*[Dd]ate\\s*\\=\\s*)(\\d{4})[\\\\\\/\\-\\.\\s\\,]+"+month_name_re+"[\\\\\\/\\-\\.\\s\\,]+(\\d{1,2})(?:st|nd|rd|th)?(?!\\d)", "mi");
  var regexp4 = new RegExp("(^\\s*\\|\\s*[Dd]ate\\s*\\=\\s*)(\\d{4})[\\\\\\/\\-\\.\\s\\,]+(\\d{1,2})(?:st|nd|rd|th)?[\\\\\\/\\-\\.\\s\\,]+"+month_name_re, "mi");
  var regexp5 = new RegExp("(^\\s*\\|\\s*[Dd]ate\\s*\\=\\s*)(\\d{4})[\\\\\\/\\-\\.\\s\\,]+"+month_name_re, "mi");
  var regexp6 = new RegExp("(^\\s*\\|\\s*[Dd]ate\\s*\\=\\s*)"+month_name_re+"[\\\\\\/\\-\\.\\s\\,]+(\\d{4})(?!(?:[\\\\\\/\\-\\.\\s\\,]+\\d{1,2}|\\d))", "mi");

  input_string = input_string.replace(regexp1, '$1{{Date|$3|'+month_number+'|$2}}');
  input_string = input_string.replace(regexp2, '$1{{Date|$3|'+month_number+'|$2}}');
  input_string = input_string.replace(regexp3, '$1{{Date|$2|'+month_number+'|$3}}');
  input_string = input_string.replace(regexp4, '$1{{Date|$2|'+month_number+'|$3}}');
  input_string = input_string.replace(regexp5, '$1{{Date|$2|'+month_number+'}}');
  input_string = input_string.replace(regexp6, '$1{{Date|$2|'+month_number+'}}');
  
  return input_string;
}

/* returns author information, as found. Returns in vector form, as {(Entire author string), (author project code), (author username)}. If unable to parse project code or username, then they will be null */
function mto_parse_author_info(text)
{
  "use strict";

  var return_array = new Array(3);

  var authorstart = text.search(/^\s*\|\s*[Aa]uthor\s*\=\s*(.+?)\.?\s*$/m);
  return_array[0] = text.substring(authorstart, text.indexOf('\n', authorstart)).replace(/^\s*\|\s*[Aa]uthor\s*\=\s*(.+?)\s*\.?(?:\s*<\s*[Bb][Rr]\s*\/?>\s*)?\s*?$/, '$1');

  var username_array = /^.*\[\[(?:\s*\:?\s*([a-z]{1,2}))\s*\:[A-Za-z]?[a-z]+:(.*?)[\|\]](?:.\n*)*$/.exec(return_array[0]);
  if (username_array) //successful username extraction match
  {
    return_array[1]= username_array[1];
    return_array[2]= username_array[2];
  }
  return return_array;
}

function mto_uploader_is_author(text)
{
  "use strict";

  return text.replace(/(\|\s*[Aa]uthor\s*\=\s*.*?)\s*\.{0,2}(?:\s*<\s*[Bb][Rr]\s*\/?>)?\s*\n\s*([^\s\|\}].*\n\s*[\|\}])/, '$1 / $2');
}

function mto_uploader_isnt_author(text)
{
  "use strict";

  //original upload date is useless
  text = text.replace(/(\|[Dd]ate\s*\=.*?)\s*(?:<\s*[Bb][Rr]\s*\/?>\s*)?\*?\s*\{\{\s*[Oo]riginal +upload +date\s*\|\s*[\d\-]+\s*\}\}(?:; *\{\{[Dd]ate[\d\|\s]+\}\} *\(last +version\)*)?/, '$1');

  //original author info is useless
  text = text.replace(/(\|\s*[Aa]uthor\s*\=\s*?.*?)\.?(?:\s*<\s*[Bb][Rr]\s*\/?>)?\s*(?:[Uu]ploaded\s+by\s+)?\[\[\s*\:?\s*([a-z]{1,2})\s*\:(?:[A-Za-z]?[a-z]+)\:(?:.+?)\s*(?:\|(?:.*?))\]\] at (?:\[http:\/\/)?\2\.w[\w\. ]+\]?.*\n/, '$1\n');
  return text.replace(/(\|\s*[Aa]uthor\s*\=\s*?.*?)\.?(?:\s*<\s*[Bb][Rr]\s*\/?>)?\s*Later +version\(s\) +were +uploaded +by\s*?\n?/, '$1\n');
}

//must process source cleanup in order to continue
function own_replace_pre(text)
{
  "use strict";

  text = text.replace(/Transferred from \[(http:\/\/[\w\-\. ]+)\]; transferr?ed to Commons/im, 'Transferred from [$1] to Commons');
  text = text.replace(/Transferred from \[(http:\/\/[\w\-\. ]+)\]; Transfer was stated to be made by \[\[(.+?)\]\]/im, 'Transferred from [$1] to Commons by [[$2]]');
  text = text.replace(/\|(\s*source\s*=\s*)transferred from \[(http:\/\/[\w\-\. ]+)\]((?: to commons by \[\[.+?\]\]\.?)?)\s*?(?:<\s*br\s*\/?>)?((?:\susing .+?)?)\s*\.?(?:\s*<\s*br\s*\/?\s*>)?\n+\s*\(original\stext\s*\:\s*('')?([\S\s]*?)\5\)(\s*(?:\|\s*(?:description|date|author|permission|other[ _]+versions|other[ _]+fields)\s*\=|\}\}))/im, '|$1$6<br/>\nTransferred from [$2]$3$4.$7');
  return text;
}

/* internal function only */
function own_replace(text, username, projcode, force)
{
  "use strict";

  //define functions
  function own_replace_1(text, internal_re, replace_text)
  {
    var re = new RegExp("(\\|\\s*source\\s*\\=[^\\S\\n]*(?!\\|))("+internal_re+".*?)(?:\\s*?<\\s*[Bb][Rr]\\s*\\/?>)?\\s*?(\\n?(?:transferr?ed +from +\\[http|\\{\\{\\s*[Tt]ransferred +from|\\(?Original(?:ly)? +uploaded +on +[a-z\\-]{2,}\\.wik).+)?\\n", "i");
    var re_parsed = re.exec(text);

    if (re_parsed!==null)
    {
      //just prepend under circumstances
      if (re_parsed[2].search(/(?:\s*transferr?ed +from +\[http|\{\{\s*[Tt]ransferred +from|\(?Original(?:ly)? +uploaded +on + [a-z\-]{2,}\.wik)/i)===0)
       {return text.replace(re, "$1"+replace_text+"<br/>$2\n");}
      //prepend plus original text
      else if (re_parsed[2].search(/\s*(?:\*\s*)?\{\{\s*(?:[Oo]wn|[Ss]elf(?:\-| +)photographed)\s*\}\}/)!==0 )
       {return text.replace(re, "$1"+replace_text+" ({{original text|1=$2|nobold=1}})<br/>$3\n");}
    }
    //we've decided not to parse for whatever reason
    return text;
  }
  function own_replace_2(text, internal_re, replace_text)
  {
    var re = new RegExp("(\\|\\s*source\\s*\\=[^\\S\\n]*)(?!\\|)("+internal_re+".*?)(?:\\s*?<\\s*[Bb][Rr]\\s*\\/?>)?\\n", "i");
    var re_parsed = re.exec(text);
    if (re_parsed!==null)
    {
      if (re_parsed[2].search(/\s*(?:\*\s*)?\{\{\s*(?:[Oo]wn|[Ss]elf\-photographed)\s*\}\}/)!==0 &&
        re_parsed[2].search(/(?:\s*transferr?ed +from +\[http|\{\{\s*[Tt]ransferred +from|\(?Original(?:ly)? +uploaded +on + [a-z\-]{2,}\.wik)/i)!==0)
       {return text.replace(re, "$1"+replace_text+" ({{original text|1=$2|nobold=1}})\n");}
      //just prepend under circumstances
      else if (re_parsed[2].search(/\|\s*source\s*\=\s*\*?\s*\{\{\s*(?:[Oo]wn|[Ss]elf(?:\-| +)photographed)\s*\}\}/)===0)
       {return text.replace(re, "$1"+replace_text+"<br/>$2\n");}
    }
    return text;
  }

  text = own_replace_pre(text);

  var re_anything = "[^\\S\\n]*[^\\|\\s].*?";
  var username_test_re = (projcode && username)?"|\\[\\[\\s*:\\s*"+(projcode?projcode.replace("-", "\\\\-"):/*dummy*/"")+"\\s*:\\s*[^\\s\\d\\+\\:\\|\\[\\]]+?\\s*:\\s*"+username+"\\s*\\|\\s*(?:[^\\s\\d\\+\\:\\|\\[\\]]+?\\s*\\:\\s*)?"+username+"\\s*\\]\\].*?":"";
  var sp_re_const = force==="selfphoto"?re_anything:"(?:selb(?:st|er)[\\s\\-]*(?:(?:ph|f)oto(?:gra(?:f|ph)ie(?:rt)?)?|aufgenommen|geknipst)|self[\\-\\s+]taken|i[^a-z]took[^a-z]this[^a-z](?:photo(?:graph)?|picture))\\.?";
  var own_re_const = force?re_anything:"(?:I\\,? .*created this (?:work|image) entirely by myself\\.?|self[\\-\\s+]made|own(?:\\s+work)?|(?:selb(?:st|er)|eigen(?:e|es)?)(?:\\s*(?:werk|(?:ph|f)oto(?:gra(?:f|ph)ie(?:rt)?)?|archiv|gezeichnet|erstellt|aufnahme|bild(?:er)?|arbeit))?|opera propria"+username_test_re+")\\.?";

  var text_bak = own_replace_1(text, sp_re_const, "{{self-photographed}}");
  if (text_bak===text)
  {
    text_bak = own_replace_2(text, sp_re_const, "{{self-photographed}}");
    if (text_bak===text)
    {
      text_bak = own_replace_1(text, own_re_const, "{{own}}");
      if (text_bak===text) 
      {
        text_bak = own_replace_2(text, own_re_const, "{{own}}");
        if (text_bak===text && force) 
        {
          text_bak = text=text.replace(/(\|\s*source\s*\=(?!\s*\{\{\s*(?:[Oo]wn|[Ss]elf(?:\-| +)photographed)\s*\}\}))/i, force==="selfphoto"?"$1{{self-photographed}}":"$1{{own}}");
        }
      }
    }
  }

  text = text_bak.replace(/\|(\s*[Ss]ource\s*\=\s*\{\{(?:[Oo]wn|[Ss]elf\-photographed)\}\}(?:\s*\(?\s*\{\{\s*[Oo]riginal +text\s*\|.+?\|\s*nobold\s*\=\s*1\s*\}\}\s*\)?)?)(?:\s*<\s*[Bb][Rr]\s*\/?>){2,}\s*/, '|$1<br/>\n');
  text = text.replace(/\|(\s*[Ss]ource\s*\=\s*\{\{(?:[Oo]wn|[Ss]elf\-photographed)\}\}(?:\s*\(?\s*\{\{\s*[Oo]riginal +text\s*\|.+?\|\s*nobold\s*\=\s*1\s*\}\}\s*\)?)?)\s*<\s*[Bb][Rr]\s*\/?>(\s*(?:\|\s*(?:description|date|author|permission|other[ _]+versions|other[ _]+fields)\s*\=|\}\}|\*))/, '|$1$2');
  return text;
}

function mto_self_photographed_wrapper(autosave_ignored)
{
  "use strict";

  var cleanup = document.editform.wpTextbox1.value;

  //parse comments/nowikis
  var cnwt = parse_comments_nowikis(cleanup);
  var cleanup2 = cnwt[0];
  var tokens = cnwt[1];
 
  cleanup2 = mto_own(cleanup2, true);

  //rebuild comments/nowikis
  cleanup2 = rebuild_comments_nowikis(cleanup2, tokens, false);
  document.editform.wpTextbox1.value=cleanup2;
}

function mto_own_wrapper(autosave_ignored)
{
  "use strict";

  var cleanup = document.editform.wpTextbox1.value;

  //parse comments/nowikis
  var cnwt = parse_comments_nowikis(cleanup);
  var cleanup2 = cnwt[0];
  var tokens = cnwt[1];
 
  cleanup2 = mto_own(cleanup2, false);

  //rebuild comments/nowikis
  cleanup2 = rebuild_comments_nowikis(cleanup2, tokens, false);
  document.editform.wpTextbox1.value=cleanup2;
}

function mto_own(text, selfphoto)
{
  "use strict";

  //author cleanup 
  text = mto_uploader_is_author(text);

  var authorinfo = mto_parse_author_info(text);
  var author = authorinfo[0];
  var project = authorinfo[1];
  var user = authorinfo[2];

  text = own_replace(text, user, project, selfphoto?"selfphoto":"own");

  //specific license cleanup
  if (user)
   {text = text.replace(/\{\{\s*pd\-release\s*\}\}/mi, "{{PD-user-w|"+project+"|wikipedia|"+user+"}}");}

  //{{Self}} cleanup
  text = text.replace(/\{\{\s*([Ss]elf2?|[Pp]ropio|[Сс]ебе|[Mm]ultilicense +replacing +placeholder(?: +new)?)\s*\|(?!.*\s*[Aa]uthor\s*\=[^\|]+\|\s*)(.*?)\}\}/m, "{{$1|author="+author+"|$2}}");
  text = iterative_replace(/\|(\s*[Ss]ource\s*\=\s*\{\{(?:[Oo]wn|[Ss]elf\-photographed)\}\}<br\/>)\s*<\s*[Bb][Rr]\s*\/?>\s*/, '|$1\n', text);
  return text;
}

// mto_cleanup tagger (initially developed by Patstuart/Magog the Ogre)
function mto_cleanup(autosave_ignored){
  "use strict";
  
  var hascat=false;
  var textbox = getParamValue('uploadformstyle')==="basic"?wpUploadDescription:document.editform.wpTextbox1;
  var cleanuptmp = textbox.value;
  var originaltext = cleanuptmp;

  //store comments, nowikis as is to avoid changing them 
  var cnwt = parse_comments_nowikis(cleanuptmp);
  var cleanup = cnwt[0];
  var tokens = cnwt[1];

  var de_wp_re = /\{\{Bild\-GFDL\-Neu\}\}\s*\r?\n\{\{(?:\s*self\s*\|\s*author\=.*\|)?Cc\-by\-sa\-3\.0\}\}\s*\r?\n\{\{Cc\-by\-sa\-3\.0\-de\}\}\s*\r?\n\{\{GFDL(?:\-user\-de(?:\|.*)?)?\}\}/i;

  cleanup = bot_move_checked(cleanup);
  cleanup = cleanup.replace(/&times;/g, '×');
  cleanup = cleanup.replace(/&#x7C;/g, '|');
 
  //because the new bot really is this dumb
  cleanup = cleanup.replace(/(\|\s*[Dd]escription\s*\=\s*(.+)[\s\S]+)\=\=\s*[Ss]ummary\s*\=\=\n+\2/, '$1');

  //Excessive wordage/markup in description
  cleanup = cleanup.replace(/\{\{\s*([a-z]{2,4}(?:\-[a-z]{5})?)\s*\|\s*(1\s*\=)?\s*(?:this\s+is\s+)?(?:an?\s+|the\s+)?(?:photo(?:graph)?|picture|image)\s+(?:taken\s+)?(?:of|depicting)\s+(\S)/ig, "{{$1|$2$3");
  cleanup = iterative_replace(/\{\{\s*([a-z]{2,4}(?:\-[a-z]{5})?)\s*\|\s*((?:.|\n?)+?)(?:\s*<\s*[Bb][Rr]\s*\/?>\n+)?(?:\s*\[\[\s*\:\s*\1\s*\:\s*(?:[Cc]ategor(?:y|ia)|[Cc]atégorie|[Kk]ategorie|[Кк]атегория)\s*\:[^\]]+\]\])+\s*\}\}/, "{{$1|$2}}", cleanup);
  cleanup = cleanup.replace(/\{\{(\s*[a-z]{2,4}(?:\-[a-z]{5})?\s*\|\s*(.+?))\s*(?:<\s*[Bb][Rr]\s*\/?>)?(\s*\n+\=\=[^\=]+\=\=)*\s*\}\}/m, "{{$1}}");
  cleanup = cleanup.replace(/\{\{\s*[a-z]{2,4}(?:\-[a-z]{5})?\s*\|\s*(\=+).*?\1\s*\}\}/, "");

  //capitalization of first letter of description
  cleanup = cleanup.replace(/(\{\{\s*(?:[Aa]a|[Aa]b|[Aa]ce|[Aa]f|[Aa]k|[Aa]ln|[Aa]ls|[Aa]m|[Aa]n|[Aa]ng|[Aa]r|[Aa]rc|[Aa]rn|[Aa]rz|[Aa]s|[Aa]st|[Aa]v|[Aa]vk|[Aa]y|[Aa]z|[Bb]a|[Bb]ar|[Ss]gs|[Bb]aybayin|[Bb]cc|[Bb]cl|[Bb]e|[Bb]e-tarask|[Bb]g|[Bb]h|[Bb]i|[Bb]jn|[Bb]m|[Bb]n|[Bb]o|[Bb]py|[Bb]r|[Bb]s|[Bb]ug|[Bb]xr|[Bb]yp|[Cc]a|[Cc]bk-zam|[Cc]do|[Cc]e|[Cc]eb|[Cc]g|[Cc]h|[Cc]ho|[Cc]hr|[Cc]hy|[Cc]o|[Cc]r|[Cc]rh|[Cc]rh-cyrl|[Cc]s|[Cc]sb|[Cc]u|[Cc]v|[Cc]y|[Dd]a|[Dd]e|[Dd]ia|[Dd]iq|[Dd]sb|[Dd]v|[Dd]z|[Ee]e|[Ee]l|[Ee]ml|[Ee]n|[Ee]o|[Ee]s|[Ee]t|[Ee]u|[Ee]xt|[Ff]a|[Ff]f|[Ff]i|[Ff]il|[Ff]iu-vro|[Ff]j|[Ff]o|[Ff]r|[Ff]rc|[Ff]rp|[Ff]rr|[Ff]ur|[Ff]y|[Gg]a|[Gg]ag|[Gg]an|[Gg]d|[Gg]l|[Gg]lk|[Gg]n|[Gg]ot|[Gg]sw|[Gg]u|[Gg]v|[Hh]a|[Hh]ak|[Hh]aw|[Hh]e|[Hh]i|[Hh]if|[Hh]mn|[Hh]o|[Hh]r|[Hh]sb|[Hh]t|[Hh]u|[Hh]y|[Hh]z|[Ii]a|[Ii]d|[Ii]e|[Ii]g|[Ii]i|[Ii]k|[Ii]ke-latn|[Ii]lo|[Ii]o|[Ii]s|[Ii]t|[Ii]u|[Jj]a|[Jj]a2|[Jj]bo|[Jj]v|[Kk]a|[Kk]aa|[Kk]ab|[Kk]g|[Kk]i|[Kk]j|[Kk]k|[Kk]l|[Kk]m|[Kk]n|[Kk]o|[Kk]r|[Kk]rc|[Kk]rj|[Kk]s|[Kk]sh|[Kk]u|[Kk]v|[Kk]y|[Ll]a|[Ll]ad|[Ll]b|[Ll]be|[Ll]fn|[Ll]g|[Ll]i|[Ll]ij|[Ll]mo|[Ll]n|[Ll]o|[Ll]oz|[Ll]t|[Ll]tg|[Ll]v|[Ll]zh|[Mm]ap-bms|[Mm]df|[Mm]g|[Mm]h|[Mm]hr|[Mm]i|[Mm]k|[Mm]l|[Mm]n|[Mm]o|[Mm]r|[Mm]rj|[Mm]s|[Mm]t|[Mm]us|[Mm]wl|[Mm]y|[Mm]yv|[Mm]zn|[Nn]a|[Nn]ah|[Nn]al|[Nn]ap|[Nn]b|[Nn]ds|[Nn]ds-nl|[Nn]e|[Nn]ew|[Nn]g|[Nn]l|[Nn]n|[Nn]o|[Nn]on|[Nn]ov|[Nn]rm|[Nn]so|[Nn]v|[Nn]y|[Oo]c|[Oo]m|[Oo]r|[Oo]s|[Pp]a|[Pp]ag|[Pp]am|[Pp]ap|[Pp]cd|[Pp]dc|[Pp]dt|[Pp]fl|[Pp]i|[Pp]ih|[Pp]l|[Pp]ms|[Pp]nb|[Pp]nt|[Pp]s|[Pp]t|[Pp]t-br|[Qq]u|[Rr]m|[Rr]my|[Rr]n|[Rr]o|[Rr]oa-rup|[Rr]oa-tara|[Rr]u|[Rr]ue|[Rr]w|[Ss]a|[Ss]ah|[Ss]c|[Ss]cn|[Ss]co|[Ss]d|[Ss]dc|[Ss]e|[Ss]ei|[Ss]es|[Ss]g|[Ss]h|[Ss]hi|[Ss]i|[Ss]k|[Ss]l|[Ss]li|[Ss]m|[Ss]ma|[Ss]n|[Ss]o|[Ss]q|[Ss]r|[Ss]r-ec|[Ss]r-el|[Ss]rn|[Ss]s|[Ss]t|[Ss]tq|[Ss]u|[Ss]v|[Ss]w|[Tt]a|[Tt]e|[Tt]et|[Tt]g|[Tt]gl|[Tt]h|[Tt]i|[Tt]im|[Tt]k|[Tt]n|[Tt]o|[Tt]okipona|[Tt]pi|[Tt]r|[Tt]s|[Tt]t|[Tt]um|[Tt]w|[Tt]y|[Tt]yv|[Uu]dm|[Uu]g|[Uu]k|[Uu]nknown +language|[Uu]r|[Uu]z|[Vv]e|[Vv]ec|[Vv]ep|[Vv]i|[Vv]ls|[Vv]o|[Ww]a|[Ww]ar|[Ww]o|[Ww]uu|[Xx]al|[Xx]h|[Yy]ai|[Yy]dd|[Yy]i|[Yy]i-latn|[Yy]o|[Yy]ue|[Zz]a|[Zz]ea|[Zz]h|[Zz]h-hans|[Zz]h-hant|[Zz]h-hk|[Zz]h-min-nan|[Zz]h-sg|[Zz]u|[Zz]xx)\s*\|\s*(?:1\s*\=\s*)?)(\S)(\S*)/gm, function($0, $1, $2, $3) {return $1 + (($2+$3).match(/^(?:(?:https?|gopher|news|ftp|irc):\/\/|mailto:)/)?$2:$2.toUpperCase()) + $3;});

  //pipe wikilinks in descriptions
  cleanup = iterative_replace(/\|(\s*[Dd]escription\s*\=(?:.|\n?)*?)\{\{\s*([a-z]{2,4}(?:\-[a-z]{5})?)\s*\|((?:.|\n?)*)\[\[\s*\:\s*\2\s*\:\s*([^\|\]]*?)\]\]((?:.|\n?)*\}\}(?:.|\n?)*?\|\s*(?:[Ss]ource|[Dd]ate)\s*)\=/, "|$1{{$2|$3[[:$2:$4|$4]]$5=", cleanup);

  //Empty descriptions
  cleanup = cleanup.replace(/(?:(\=)\s*)?\{\{\s*[a-z]{2,4}(?:\-[a-z]{5})?\s*\|\s*(?:1\s*\=\s*)?(?:\'\'no original description\'\'\s*)?\s*\}\}\s*$/mg, "$1");

  //internationalization
  cleanup = cleanup.replace(/\s*original upload date\s*(?!(?:\}\}|\|))/im, '{{original upload date}}');
  cleanup = cleanup.replace(/^(\=+) *li[cz]en(?:s(?:e|ing)(?:\s+information)?|za(?: +d\'uso)?|z) *\:? *\1\s*$/mig, '$1 {{int:license-header}} $1');
  cleanup = cleanup.replace(/^(\=+) *\{\{\s*(?:[Ii][Nn][Tt]|[Mm][Ee][Dd][Ii][Aa][Ww][Ii][Kk][Ii])\s*\:\s*[Ll]icense\s*\}\} *\:? *\1\s*?$/mg, '$1 {{int:license-header}} $1');
  cleanup = cleanup.replace(/^(\=+) *original upload (?:log|history) *\:? *\1\s*$/mig, '$1 {{Original upload log}} $1');
  cleanup = cleanup.replace(/^(\=+) *(?:summary|dettagli|beschreibung|beschreibung\W+quelle) *\:? *\1\s*$/mig, '$1 {{int:filedesc}} $1');

  //Magnus bot bug where it's transferring over a second and incorrect license
  cleanup = cleanup.replace(/((\=+) *\{\{int:licens[e|ing](?:\-header)?\}\} *\2[\s\S]+?)((\=+) *\{\{int:licens[e|ing](?:\-header)?\}\} *\4)(?:\s*\{\{.+?\}\})+\s*$/, "$1");

  //because the new bot by Jan Luca and Magnus Manske spams itself like crazy and thus creates two headrs
  cleanup = iterative_replace(/^(\=+) *\{\{int:filedesc\}\} *(?:\1(?:\s*\n)+)\1 *\{\{int:filedesc\}\} *\1\s*\n/mig, '$1 {{int:filedesc}} $1\n', cleanup);

  //rm various useless permission messages
  cleanup = cleanup.replace(/(\|\s*[Pp]ermission\s*\=[^\|\n]*?)(?:\s*?<\s*[Bb][Rr]\s*\/?>)?\n\(Original +text *: *(?:\'\')?(?:[Pp]ublic\s+[Dd]omain|GNU\s*\-?\s*FDL|[Yy][Ee][Ss]|[Jj][Aa]|[Ss][Ee][Ee]\s+[Bb][Ee][Ll][Oo][Ww]|[Bb]ild\-frei|[Ss]ee +license +section|(?:[Cc][Cc]\-(?:[Bb][Yy](?:\-[Ss][Aa])?(?:\-\d\.\d)?|zero)\,?\s*)+)\.?\s*(?:\'\')?\)\s*?\n(\s*?\|)/, "$1\n$2");
  cleanup = iterative_replace(/(\|\s*[Pp]ermission\s*\=)(?:(.+?);)?\s*(?:PD[A-Z0-9\-\| ]*\d*|CC\-(?:ZERO|BY(?:[\w\-\,\.]*)?)|(?:BILD\-)?GFDL[A-Z\-\|]*|ATTRIBUTION|NORIGHTSRESERVED|BILD\-BY|BSD|[Tt]his image is in the (?:\[\[)?public domain(?:\]\])?(?: due to its age| because it is ineligible for copyright)?|[Rr]eleased under the \[\[GNU Free Documentation License\]\]|GNU Free Documentation License 1\.2|[Rr]eleased into the public domain \(?by the author\)?|[Ll]icensed under the \[\[GFDL\]\] \(?by the author\)?|\-+)\s*(\;.*?)?\.?(?:<\s*[Bb][Rr]\s*\/?>)?\s*?(?:\s*\([Oo]riginal +text\s*\:\s*(\'\')([\S\s]*?)\4\)|\s*(\{\{\s*(?:[Pp]ermission *OTRS|[Oo]TRSPermission|[Cc]rediti|[Рр]азрешение +OTRS)\s*\|[\s\S]*?\}\})\s*)?(\n\s*\|(?:description|date|author|permission|other[ _]+(?:versions|fields)))/, '$1$2$3; $5$6$7', cleanup); //\| included due to early buggy bot moves
  cleanup = cleanup.replace(/(\|\s*[Pp]ermission\s*\=)\s*(\'\')?(?:see +license +section|see +below)\.?\s*\2\s*\n/i, '$1\n');
  cleanup = cleanup.replace(/(\|\s*[Pp]ermission\s*\=\s*);+\s*([^\s\|])/, '$1$2');
  cleanup = cleanup.replace(/(\|\s*[Pp]ermission\s*\=.*);+(?:\s*(\.))?(\s*\|(?:description|date|author|permission|other[ _]+(?:versions|fields)))/i, '$1$2$3');

  //wordage changes 
  cleanup = cleanup.replace(/\|(\s*source\s*\=.*\s*(?:\(?\s*)Original uploaded on [a-z]+\.wiki[a-z]+)(?:\s*\)?)((?:\s*\|\s*date\s*\=\s*.*)?)\s*\|\s*(\s*author\s*=\s*.+)\s*\(transferr?ed\s*by\s*(.+?)\)/im, '|$1 (transferred to commons by $4)$2\n|$3');
  cleanup = cleanup.replace(/\|(\s*source\s*\=.+\s*(?:\(\s*)Original(?:ly)? uploaded on [a-z]+\.wiki[a-z]+(?:\s*\)))((?:.|\n)+?)\/Original(?:ly)?\s+uploaded\s+by\s+.+?(\){0,1}\s*$)/im, '|$1 - $2$3');

  //author cleanup
  cleanup = cleanup.replace(/(\|\s*[Aa]uthor\s*\=\s*\[\[(?:\s*\:\s*[a-z]+){1,2}\s*\:[^\]\|]+\|[^\]\|]+\]\]) \d{2}\:\d{2}\, (?:\d\.? [^\]\|\(\d]+|\d\.? [^\]\|\(\d]+) \d+ \((?:CES?T|UTC)\)\.?/g, "$1");
  cleanup = cleanup.replace(/(?:Original upload(?:er was|ed by)|Uploaded by) (.+)/m, "$1");
  cleanup = cleanup.replace(/^(\|\s*[Aa]uthor\s*\=\s*)(.+?)\.?\s*(?:<\s*br\s*\/?>\s*)?\n+\s*\[\[\s*\:\s*([a-z]{2,4}(?:\-[a-z]{5})?\s*\:\s*.+?\s*\:\s*\2(?:\s*\|.*))\s*\]\]/mi, "$1[[:$3]]");
  cleanup = cleanup.replace(/^(\|\s*[Aa]uthor\s*\=\s*)\-*\s*\[\[\s*\:\s*([a-z]{2,4}(?:\-[a-z]{5})?)\s*\:\s*[^\]]+?\s*\:\s*([^\]]+?)(?:\s*\|[^\]]*)?\s*\]\](?:\s*\(?\[\[:\2\:[^\|\]\:]+?\:\s*\3\s*\|[^\|\]\:]+\]\]\)?)?(.*?)\s*\)?\s*\.?\s*(?:<\s*br\s*\/?>\s*)?\n+\s*(\[\[\s*\:\s*\2\s*\:\s*.+?\s*\:\s*\3(?:\s*\|.*)\s*\]\] .*)$/mi, "$1$5$4");
  cleanup = cleanup.replace(/^(\|\s*[Aa]uthor\s*\=.*) 0?\d{1,2}:\d{2}\, \d{1,2}\.? [^\d\[\]\.\,\s\(\)\'\;\"]{3}\.? \d{4} \((?:UTC|CES?T)\)?\.?/m, "$1");

  //check if original uploader appears not to be the author
  var uploader_status = 0;
  var UNKNOWN   = 1;
  var AUTHOR    = 2;
  var NOTAUTHOR = 4;
  var PD_UNKNOWN = "release|text(?: *logo)?|ineligible|trivial|uegnet|markenrecht|shape|simple|geometry|link|chem|author|because|reason|porque|reden";
  var PD_AUTHOR  = "self|users?|utente|own";
  var GFDL_AUTHOR= "users?|self";
  if (cleanup.match(new RegExp("\\{\\{\\s*(?:pd\\-(?:"+PD_UNKNOWN+")|cc|gfdl(?!\\-)|gfdl\\-(?!"+GFDL_AUTHOR+")|bild\\-pd\\-(?:frei|auteur))|attribution", "i")))
    {uploader_status |= UNKNOWN;}
  if (cleanup.match(new RegExp("\\{\\{\\s*(?:pd\\-(?:"+PD_AUTHOR+")|Јв\\-ја|مالکیت عمومی-خود |گنو-خود |Pd\\=self|self|gfdl\\-(?:"+GFDL_AUTHOR+"))", "i")) || de_wp_re.test(cleanup))
    {uploader_status |= AUTHOR;}
  if (cleanup.match(new RegExp("\\{\\{\\s*(?:pd\\-(?!"+PD_UNKNOWN+"|review|"+PD_AUTHOR+").+|larsencopyright)\\s*(?:\\}\\}|\\|)", "i")))
    {uploader_status |= NOTAUTHOR;}

  if (uploader_status === AUTHOR)
    {cleanup = mto_uploader_is_author(cleanup);}
  if (uploader_status === NOTAUTHOR)
    {cleanup = mto_uploader_isnt_author(cleanup);}
 

  var project_code; var username; 
  var authorinfo = mto_parse_author_info(cleanup);
  if (authorinfo)
  {
    var author = authorinfo[0];
    project_code = authorinfo?authorinfo[1]:null;
    username = authorinfo?authorinfo[2]:null;

    //{{Multilicense}} cleanup cleanup
    cleanup = cleanup.replace(/\{\{\s*([Mm]ultilicense +replacing +placeholder(?: +new)?)\s*\|(?!.*\s*[Aa]uthor\s*\=[^\|]+\|\s*)(.*?)\}\}/m, "{{$1|author="+author+"|$2}}");
  }

  
  //own template (internationalization/standardization). I am perfectly aware that some code is run twice, but whatever
  cleanup = own_replace_pre(cleanup);
  var cleanup2 = own_replace(cleanup, username, project_code, false);
  if (cleanup!==cleanup2)
  {
    cleanup=mto_own(cleanup2);

    //own_replace may have altered the text; worth a retry
    if (!username)
    {
      authorinfo = mto_parse_author_info(cleanup);
      if (authorinfo)
      {
        //var author = authorinfo[0]; ---- unneeded for now ----
        project_code = authorinfo?authorinfo[1]:null;
        username = authorinfo?authorinfo[2]:null;
      }
    }
  }

  //LOC-image automatic detection
  cleanup = cleanup.replace(/(?:\s*?<\s*[Bb][Rr]\s*\/?>)*\s*(?:http:\/\/)?[\w\-]+\.loc\.gov\/loc.pnp\/([\w\.]+\w*\d)\.?(?:\s*?<\s*[Bb][Rr]\s*\/?>)*\s*/, "{{LOC-image|id=$1}}");
  cleanup = cleanup.replace(/([\s\S]+)(\{\{\s*[Ll]OC\-image\s*\|\s*(?:(?:1|id)\=)?[\w\.]+\w\}\})([\s\S]+)(\|\s*[Ss]ource\s*\=)([\s\S]+)/, "$1$3$4$2$5");
  cleanup = cleanup.replace(/([\s\S]+)(\|\s*[Ss]ource\s*\=)([\s\S]+)(\{\{\s*[Ll]OC\-image\s*\|\s*(?:(?:1|id)\=)?[\w\.]+\w\}\})([\s\S]+)/, "$1$2$4$3$5");

  //OTRS template from de.wp cleanup
  cleanup = cleanup.replace(/\{\{\s*[Oo](?:TRS(?:[ _]+permission)?|trs)\s*\|\s*(?:1\s*\=)?(\d+)\s*(\|.*?\}\}|\}\})/, "{{PermissionOTRS|id=$1$2");

  //date fixup (rm needless text)
  cleanup = cleanup.replace(/(\|\s*[Dd]ate\s*\=\s*)\d\d:\d\d\, (\d\d?) ([A-Z][a-z]+) (\d{4}) \(UTC\)(?:\s*<\s*[Bb][Rr]\s*\/?>)?\s*[\s\n]\s*\(?(\{\{\s*[Dd]ate\s*\|\s*\4\s*\|\s*\d\d\s*\|\s*0?\2\s*\}\})\s*\(\s*\{\{\s*[Oo]riginal +upload +date\s*\}\}\s*\)\s*\)?/, '$1$5 ({{original upload date}})');
  cleanup = cleanup.replace(/\|(\s*date\s*=\s*.*?\s*?)(?:\(Uploaded on Commons at )?[\d\- \:]+\(UTC\)\s*[\(\/]original(?:ly)? uploaded at (\d{4})\-(\d{2})\-(\d{2})[\s\d\:]+\)?/i, '|$1 {{Date|$2|$3|$4}} ({{original upload date}})');

  //circa/or/before/early/late/between date (also remove upload date if this is provided)
  cleanup = cleanup.replace(/(\|\s*[Dd]ate\s*\=\s*)[Cc](?:(?:[Ii][Rr][Cc])?[Aa])?\.?\s*(\d{4}(?:\s*\-\s*\d{1,2}(?:\s*\-\s*\d{1,2})?)?)(\W)/, '$1{{circa|$2}}$3');
  cleanup = cleanup.replace(/(\|\s*[Dd]ate\s*\=\s*)(\d{4})(?:\s*(\-)\s*(\d{1,2}))?(?:\s*(\-)\s*(\d{1,2}))?\s*[Oo][Rr]\s*(\d{4})(?:\s*(\-)\s*(\d{1,2}))?(?:\s*(\-)\s*(\d{1,2}))?\s*\.?\s*(?:<\s*[Bb][Rr]\s*\/?>\s*)?\s*(?:\(?\{\{\s*[Dd]ate\s*\|\s*\d{4}s*\|\s*\d{1,2}s*\|\s*?\d{1,2}\s*\}\}\s*\(\s*\{\{\s*[Oo]riginal +upload +date\s*\}\}\s*\)\s*\)?)?\s*/, "$1{{other date|or|$2$3$4$5$6|$7$8$9$10$11}}\n");
  cleanup = cleanup.replace(/(\|\s*[Dd]ate\s*\=\s*\{\{circa\|.+\}\})\s*\.?\s*(?:<\s*[Bb][Rr]\s*\/?>\s*)?\s*\(?\{\{\s*[Dd]ate[\|\s\d]+\}\}\s*\(\s*(?:\{\{\s*[Oo]riginal +upload +date\s*\}\}|first +version)\s*?\)(?:\s*\;?\s*\{\{\s*[Dd]ate[\|\s\d]+\}\}\s*\(\s*last\s+version\))?\s*?\)?/, '$1');
  cleanup = cleanup.replace(/(\|\s*[Dd]ate\s*\=\s*)(?:[Bb][Ee][Ff][Oo][Rr][Ee]\s+|[Pp][Rr][Ee]\s*\-)(\d{4}(?:\s*\-\s*\d{1,2}(?:\s*\-\s*\d{1,2})?)?)\s*\.?\s*(?:<\s*[Bb][Rr]\s*\/?>\s*)?\n\s*\(?\{\{\s*[Dd]ate\s*\|\s*\d{4}s*\|\s*\d{1,2}s*\|\s*?\d{1,2}\s*\}\}\s*\(\s*\{\{\s*[Oo]riginal +upload +date\s*\}\}\s*\)\s*\)?/, '$1{{other date|before|$2}}');
  cleanup = cleanup.replace(/(\|\s*[Dd]ate\s*\=\s*)(?:[Ll][Aa][Tt][Ee]\s+|[Pp][Rr][Ee]\s*\-)(\d{4}(?:\s*\-\s*\d{1,2}(?:\s*\-\s*\d{1,2})?)?)\s*\.?\s*(?:<\s*[Bb][Rr]\s*\/?>\s*)?\n\s*\(?\{\{\s*[Dd]ate\s*\|\s*\d{4}s*\|\s*\d{1,2}s*\|\s*?\d{1,2}\s*\}\}\s*\(\s*\{\{\s*[Oo]riginal +upload +date\s*\}\}\s*\)\s*\)?/, '$1{{other date|late|$2}}');
  cleanup = cleanup.replace(/(\|\s*[Dd]ate\s*\=\s*)(?:[Ee][Aa][Rr][Ll][Yy]\s+|[Pp][Rr][Ee]\s*\-)(\d{4}(?:\s*\-\s*\d{1,2}(?:\s*\-\s*\d{1,2})?)?)\s*\.?\s*(?:<\s*[Bb][Rr]\s*\/?>\s*)?\n\s*\(?\{\{\s*[Dd]ate\s*\|\s*\d{4}s*\|\s*\d{1,2}s*\|\s*?\d{1,2}\s*\}\}\s*\(\s*\{\{\s*[Oo]riginal +upload +date\s*\}\}\s*\)\s*\)?/, '$1{{other date|early|$2}}');
  cleanup = cleanup.replace(/(\|\s*[Dd]ate\s*\=\s*)(?:[Bb][Ee][Tt][Ww][Ee]{2}[Nn])\s*(\d{4})(?:\s*(\-)\s*(\d{1,2}))?(?:\s*(\-)\s*(\d{1,2}))?\s*(?:[Aa][Nn][Dd]|\W)\s*(\d{4})(?:\s*(\-)\s*(\d{1,2}))?(?:\s*(\-)\s*(\d{1,2}))?\s*\.?\s*(?:<\s*[Bb][Rr]\s*\/?>\s*)?\s*(?:\(?\{\{\s*[Dd]ate\s*\|\s*\d{4}s*\|\s*\d{1,2}s*\|\s*?\d{1,2}\s*\}\}\s*\(\s*\{\{\s*[Oo]riginal +upload +date\s*\}\}\s*\)\s*\)?)?\s*/, "$1{{other date|between|$2$3$4$5$6|$7$8$9$10$11}}\n");

  cleanup = mto_parse_date(cleanup, "(?:jan(?:uary?|\\.)?|gennaio)", "01");
  cleanup = mto_parse_date(cleanup, "(?:feb(?:ruary?|\\.)?|febbraio)", "02");
  cleanup = mto_parse_date(cleanup, "(?:(?:mar(?:ch|\\.)?)|mär[z\\.]?|marzo)", "03");
  cleanup = mto_parse_date(cleanup, "(?:apr(?:il|\\.)?|aprile)", "04");
  cleanup = mto_parse_date(cleanup, "(?:ma[iy]|maggio)", "05");
  cleanup = mto_parse_date(cleanup, "(?:jun[ie\\.]?|giugno)", "06");
  cleanup = mto_parse_date(cleanup, "(?:jul[iy\\.]?|luglio)", "07");
  cleanup = mto_parse_date(cleanup, "(?:aug(?:ust|\\.)?|agosto)", "08");
  cleanup = mto_parse_date(cleanup, "(?:sept?(?:ember|\\.)?|settembre)", "09");
  cleanup = mto_parse_date(cleanup, "(?:o[ck]t(?:ober|\\.)?|ottobre)", "10");
  cleanup = mto_parse_date(cleanup, "(?:nov(?:ember|\\.)?|novembre)", "11");
  cleanup = mto_parse_date(cleanup, "(?:de[cz](?:ember|\\.)?|dicembre)", "12");

  //{{date}} for single year dates
  cleanup = cleanup.replace(/(\|\s*[Dd]ate\s*\=\s*)(\d{4})(\s*\.?\s*(?:<\s*[Bb][Rr]\s*\/?>\s*)?)$/m, "$1{{Date|$2}}$3");

  //fix single digit dates (first run)
  cleanup = cleanup.replace(/(\|\s*[Dd]ate\s*\=\s*\{\{\s*[Dd]ate\s*\|\d{4}\s*\|)(\d)(?!\d)/, "$10$2");
  cleanup = cleanup.replace(/(\|\s*[Dd]ate\s*\=\s*\{\{\s*[Dd]ate\s*\|\d{4}\s*\|\s*\d{2}\s*\|\s*)(\d)(?!\d)/, "$10$2");

  //rm parens around entirely original upload date
  cleanup = cleanup.replace(/\((\{\{[Dd]ate[^}]+\}\}\s*\(\{\{original upload date\}\}\))\)/, "$1"); 

  var i;
  for (i=0; i<2; i++)   //run twice to get both possible instances of date (the regex can conflict with itself); avoid changing format when undesired
  {
    /* A date written in format \d\d.\d\d.\d\d\d\d or \d\d.\d\d.\d\d is very common in Germany, apparently */
    if (project_code==="de")
    {
      cleanup = cleanup.replace(/\|(\s*[Dd]ate\s*\=\s*(?:[^\{]*\n)?)(0?\d|[1-2][0-9]|3[0-1])\.(0?\d|1[0-2]|\d)\.(\d{4})(?!\d)/, '|$1{{Date|$4|$3|$2}}');
      cleanup = cleanup.replace(/\|(\s*[Dd]ate\s*\=\s*(?:[^\{]*\n)?)(0?\d|[1-2][0-9]|3[0-1])\.(0?\d|1[0-2]|\d)\.([01]\d)(?!\d)/, '|$1{{Date|20$4|$3|$2}}'); /* FIXME: change this in the year 2020!! */
      cleanup = cleanup.replace(/\|(\s*[Dd]ate\s*\=\s*(?:[^\{]*\n)?)(0?\d|[1-2][0-9]|3[0-1])\.(0?\d|1[0-2]|\d)\.([2-9]\d)(?!\d)/, '|$1{{Date|19$4|$3|$2}}');
    }

    cleanup = cleanup.replace(/\|(\s*[Dd]ate\s*\=\s*(?:[^\{]*\n)?)(1[3-9]|2\d|3[01])[\\\/\-\.\s\,]+(0?\d|1[0-2]|\d)[\\\/\-\.\s\,]+(\d{4})(?!\d)/mg, '|$1{{Date|$4|$3|$2}}');
    cleanup = cleanup.replace(/\|(\s*[Dd]ate\s*\=\s*(?:[^\{]*\n)?)(0?\d|1[0-2]|\d)[\\\/\-\.\s\,]+(1[3-9]|2\d|3[01])[\\\/\-\.\s\,]+(\d{4})(?!\d)/mg, '|$1{{Date|$4|$2|$3}}');
    cleanup = cleanup.replace(/\|(\s*[Dd]ate\s*\=\s*(?:[^\{]*\n)?)(0?\d|1[0-2]|\d)[\\\/\-\.\s\,]+\2[\\\/\-\.\s\,]+(\d{4})(?!\d)/mg, '|$1{{Date|$3|$2|$2}}');
    cleanup = cleanup.replace(/\|(\s*[Dd]ate\s*\=[^\{]*\s*)(\d{4})\s*([\\\/\.\s])\s*(\d{1,2})\s*\3\s*(\d{2})(?!\d)/mg,  '|$1{{Date|$2|$4|$5}}');
    cleanup = cleanup.replace(/\|(\s*[Dd]ate\s*\=[^\{]*\s*)(\d{4})\s*\-\s*(\d)\s*\-\s*(\d{2})(?!\d)/mg,  '|$1$2-0$3-$4');
    cleanup = cleanup.replace(/\|(\s*[Dd]ate\s*\=[^\{]*\s*)(\d{4})\s*\-\s*(\d{2})\s*\-\s*(\d{2})(?!\d)(?!\s*\n)(?![\|}])/mg,  '|$1{{Date|$2|$3|$4}}');
  }
  for (i=0; i<2; i++)   //run twice to get both possible instances of date (the regex can conflict with itself); avoid changing format when undesired
  {
    cleanup = cleanup.replace(/\|(\s*[Dd]ate\s*\=\s*(?:[^\{]*\n)?)(\d|1[0-2]|\d(?!\d))[\\\/\-\.\s\,]+(\d{4})(?!(?:[\\\/\-\.\s\,]+\d{1,2}|\d))/mg, '|$1{{Date|$3|$2}}');
    cleanup = cleanup.replace(/\|(\s*[Dd]ate\s*\=\s*(?:[^\{]*\n)?)(\d{4})[\\\/\-\.\s\,]+(0\d|1[0-2]|\d(?!\d))(?!(?:[\\\/\-\.\s\,]+\d{1,2}|\d))/mg, '|$1{{Date|$2|$3}}');
  }

  //don't need to have upload date if a date is already provided
  cleanup = cleanup.replace(/(\|\s*[Dd]ate\s*\=\s*\{\{\s*[Dd]ate[\|\s\d]+\}\})\s*\.?(?:\s*\;?\s*<\s*[Bb][Rr]\s*\/?>)?\s*\(?(?:\{\{\s*[Dd]ate[\|\s\d]+\}\}\s*\(\s*(?:\{\{\s*[Oo]riginal +upload +date\s*\}\}|first +version)\s*?\)|\{\{\s*[Oo]riginal +upload +date\s*\|[\d\-\s]+\}\})(?:\s*\;?\s*\{\{\s*[Dd]ate[\|\s\d]+\}\}\s*\(\s*last\s+version\))?\s*?\)?/, '$1');

  //fix single digit dates, which confuses the parser below
  cleanup = cleanup.replace(/\{\{\s*[Dd]ate\s*\|\s*(\d+)\s*\|\s*(\d)\s*((?:\|\s*\d+\s*)?)\}\}/g, "{{Date|$1|0$2$3}}");
  cleanup = cleanup.replace(/\{\{\s*[Dd]ate\s*\|\s*(\d+)\s*\|\s*(\d{2})\s*\|\s*(\d)\s*\}\}/g, "{{Date|$1|$2|0$3}}");

  cleanup = cleanup.replace(/\{\{\s*[Dd]ate\s*\|\s*(\d{4})\s*\|(\d{1,2})\s*\|(\d{1,2})\s*\}\}\s*\(first version\)\s*;?\s*\{\{\s*[Dd]ate\s*\|\s*\1\s*\|\2\s*\|\3\s*\}\}\s*\(last version\)/, "{{Date|$1|$2|$3}} ({{original upload date}})");
  cleanup = cleanup.replace(/\{\{\s*[Dd]ate\s*\|\s*(\d{4})\s*\|(\d{1,2})\s*\|(\d{1,2})\s*\}\}\s*\(first version\)\s*/m, "{{Date|$1|$2|$3}} ({{original upload date}})");
  cleanup = cleanup.replace(/\|(\s*[Dd]ate\s*\=\s*(\{\{[Dd]ate\|.+\}\}))\s*(?:<\s*br\s*\/?>)?\s*\n*\s*\(?\2\s*\(\s*\{\{\s*[Oo]riginal upload date\s*\}\}\s*\)?\)?/, "|$1");
  cleanup = cleanup.replace(/(?:\(\s*)?\{\{\s*[Dd]ate\s*\|\s*(\d{4})\s*\|(\d{1,2})\s*\|(\d{1,2})\s*\}\}\s+\(?\{\{\s*[Oo]riginal upload date\s*\}\}\)?(?:\s*\))?/mg, "{{original upload date|$1-$2-$3}}");
  cleanup =  cleanup.replace(/\d{2}:\d{2}\, +\d{1,2} +[A-Z][a-z]+ +\d{4} +\([A-Z]{3,4}\)(?:<\s*br\s*\/?>)?\s*\n*\s*(?:\(\s*)?(\{\{\s*[Oo]riginal +upload +date\s*\|\s*[\d\-]+\s*\}\})(?:\s*\))?/mg, "$1"); //~~~~~ for date is ignored if upload date present

  //{{date}} format -> xxxx-yy-zz format where possible
  cleanup = cleanup.replace(/\=(\s*)\{\{\s*[Dd]ate\s*\|\s*(\d{4})\s*\|(\d{2})\s*\|(\d{2})\s*\}\}(\s*\|)/mg, "=$1$2-$3-$4$5");
  cleanup = cleanup.replace(/\=(\s*)\{\{\s*[Dd]ate\s*\|\s*(\d{4})\s*\|(\d{1,2})\s*\}\}(\s*\|)/mg, "=$1$2-$3$4");
  cleanup = cleanup.replace(/\=(\s*)\{\{\s*[Dd]ate\s*\|\s*(\d{4})\s*\}\}(\s*\|)/mg, "=$1$2$3");

  //|date=YYYY-MM-DD HH:MM:SS is improperly parsed above; fix that
  cleanup = cleanup.replace(/(\|\s*[Dd]ate\s*\=\s*)\{\{\s*[Dd]ate\s*\|\s*(\d{4})\s*\|\s*(\d{2})\s*\|(\d{2})\s*\}\}\s*(\d{2}\:\d{2}(?:\:\d{2})?)/, "$1{{ISOdate|$2-$3-$4 $5}}");
  cleanup = cleanup.replace(/(\|\s*[Dd]ate\s*\=\s*)\{\{\s*[Ii]SOdate\s*\|\s*(\d{4}\-\d{2}\-\d{2} +\d{2}\:\d{2}(?:\:\d{2})?)\s*\}\}(\s*(?:\|\s*(?:[Dd]escription|[Dd]ate|[Aa]uthor|[Pp]ermission|[Oo]ther[ _]+versions|[Oo]ther[ _]+fields)\s*\=|\}\}))/, "$1$2$3");

  //rm unnecessary comments, templates that upload bot needlessly transports over; __NOTOC__
  cleanup = cleanup.replace(/^\s*\{\{\s*(?:[Tt]emplate[\s_]+other|[Tt]ls?x?p?|FULLROOTPAGENAME|[Nn]s[\s_]+has[\s_]+subpages|\!\)|\(\!|\!\!?|[a-zA-Z]?[Mm]box|[Jj]ULIANDAY|[Ll]an|[Rr]ed|[Ee]n|[Mm]ax(?:\/2)?|[Cc]ite[\s_]+book|[Cc]itation\/core|[Cc]itation\/make[\s_]+link)\s*\|?\s*\}\}\s*$/gm, '');
  cleanup = cleanup.replace(/\{\{\s*([Pp][Dd]\-ineligible|[Pp]D\-[Tt]rivial|[Pp]D\-uegnet|[Pp]D\-Ineligible)\s*\|\s*Commons\s*\=[^\}\|]+\}\}/, "{{$1}}");
  if (!cleanup.match(/\{\{\s*[Cc]heck\s+categories/))
    {cleanup = cleanup.replace(/^__NOTOC__\s*?\n/gm, '');}

  cleanup = cleanup.replace(/\{\{P[Dd]\-user\-w\s*\|\s*(als|ar|az|bg|ca|cs|da|de|en|es||fa|fi|fr|he|hi|hr|hu|it|ja|lt|ml|nl|nn|no|pl|pt|ro|ru|sk|sl|th|uk|vls|zh)\s*\|\s*wikipedia\s*\|\s*([^\|}]+)\s*\}\}/m, '{{PD-user-$1|$2}}');
  cleanup = cleanup.replace(/\{\{P[Dd]\-user\s*\|\s*([^\|}]+)\s*\|\s*(als|ar|az|bg|ca|cs|da|de|en|es||fa|fi|fr|he|hi|hr|hu|it|ja|lt|ml|nl|nn|no|pl|pt|ro|ru|sk|sl|th|uk|vls|zh)\s*\}\}/m, '{{PD-user-$2|$1}}');

  //specific silliness reserved for de.wp transfers
  var self_author_text;
  if (username && project_code)
  {
    self_author_text = "[[:"+project_code+":"+"User:"+username+"|"+username+"]] at [http://"+project_code+".wikipedia.org "+project_code+".wikipedia]";
    cleanup = cleanup.replace(de_wp_re, "{{self|author="+self_author_text+"|cc-by-sa-3.0|cc-by-sa-3.0-de|GFDL|migration=redundant}}");
  }

  //GFDL cleanup: multiple GFDLs
  if (username && cleanup.match(/\{\{\s*(?:Bild\-)?GFDL\s*[\-\|}]/im))
  {
    var migration;
    if (cleanup.match(/\{\{\s*(?:Bild\-)?GFDL.*\|\s*migration\s*\=\s*opt\-out/im))
      {migration = "opt-out";}
    else if (cleanup.match(/\{\{\s*GFDL.*\|\s*migration\s*\=\s*not\-eligible/im))
      {migration = "not-eligible";}
    else if (cleanup.match(/\{\{\s*GFDL.*\|\s*migration\s*\=\s*relicense/im))
      {migration = "relicense";}
    else if (cleanup.match(/\{\{\s*GFDL.*\|\s*migration\s*\=\s*needs\-review/im))
      {migration = "needs-review";}
    else if (cleanup.match(/\{\{\s*GFDL.*\|\s*migration\s*\=\s*redundant/im))
      {migration = "redundant";}
    else if (cleanup.match(/\{\{\s*[Cc]c\-by\-sa\-3.0\-migrated[\-\.\,\w]*\s*\}\}/m))
      {migration = "relicense";}
    else if (cleanup.match(/\{\{\s*[Ll]icense +migration +not +eligible\s*\}\}/m))
      {migration = "not-eligible";}
    else if (cleanup.match(/\{\{\s*[Cc]c\-by\-sa\-(?:all|3.0[\-\.\,\w]*)\s*\}\}/m))
      {migration = "redundant";}
    else if (project_code==="de") //User:Leyo informs me this is always relicense
      {migration = "relicense";}
    else 
    {
      //not stated; do intelligent search based on most recent upload date
      var upload_date = /^\s*\=+\s*\{\{Original upload log\}\}\s*\=+\n+(?!\*).+?\n+\s*\*\s*(\d{4})\-(\d{2})\-\d{2}/im.exec(cleanup);
      if (upload_date===null)
        {upload_date = /^\s*\=+\s*\{\{Original upload log\}\}\s*\=+\n+(?!\*)[\s\S]+comment\s*\n+\s*\*\s*(\d{4})\-(\d{2})\-\d{2}/im.exec(cleanup);}
      
      //uploaded before November 1, 2008
      if (upload_date!==null && (upload_date[1]<=2007 || (upload_date[1]===2008 && upload_date[2]<11)))
        {migration="relicense";}
      //couldn't parse or later upload date
      else
        {migration="needs-review";}
    }
    var en_disclaimers=false;
    if (cleanup.match(/\{\{\s*GFDL[^}\|]*(?:en|\-with\-disclaimers)\s*[\|}]/m))
      {en_disclaimers = true;}

    var MTO_REPLACE_TMP = "%%%MTOTEMPTEXT%%%";
    cleanup = cleanup.replace(/\{\{\s*(?:Bild\-)?GFDL.*\}\}\s*\n?/im, MTO_REPLACE_TMP); //locate first instance
    cleanup = cleanup.replace(/\{\{\s*(?:Bild\-)?GFDL.*\}\}\s*\n?/gim, ""); //empty all others
    cleanup = cleanup.replace(MTO_REPLACE_TMP, "{{GFDL-user-"+project_code+
      (en_disclaimers?"-with-disclaimers":(project_code==="en"?"-no-disclaimers":""))+"|"+username+"|migration="+migration+"}}\n");
  }

  //Template loop for GFDL fix
  cleanup = cleanup.replace(/(\{\{\s*[Ss]elf2?\s*(?:\|[^\|\}]+)*\|)\s*GFDL\-(?:user\-[a-z]+\-with\-disclaimers|self\-with\-disclaimers|self\-en)/, "$1GFDL-with-disclaimers");
  cleanup = cleanup.replace(/(\{\{\s*[Ss]elf2?\s*(?:\|[^\|\}]+)*\|)\s*GFDL\-(?:self|user(?:\-[a-z]+)?)(?:\-no\-disclaimers)?/, "$1GFDL");
  cleanup = cleanup.replace(/(\{\{\s*[Ss]elf2?\s*(?:\|[^\|\}]+)*\|GFDL)\s*\|\s*GFDL\|/, "$1|");

  //unknown template
  cleanup = cleanup.replace(/(\|\s*date\s*\=\s*)(?:unknown(?: +date)?|not known|desconocido|desconhecido|unbekannt)\s*\.?\s*(?:(?:<\s*br\/?>)?\n?|\n)(?:(?!\|).+\n)?(\s*(?:\||\}\}))/i, "$1{{unknown|date}}\n$2");
  cleanup = cleanup.replace(/(\|\s*author\s*\=\s*)(?:unknown(?: +author)?|not known|desconocido|desconhecido|unbekannt)\s*\.?\s*(?:(?:<\s*br\/?>)?\n?|\n)(?:(?!\|).+\n)?(\s*(?:\||\}\}))/i, "$1{{unknown|author}}\n$2");

  //useless other_versions messages
  cleanup = cleanup.replace(/(\|\s*other[_ ]versions\s*\=\s*)(?:no|none(?:\s+known)?|nein|yes|keine|\-+)\s*\.?\s*\n(\s*(?:\||\}\}))/i, "$1\n$2");

  //duplicate templates
  cleanup = cleanup.replace(/\{\{\s*(?:[Cc]c\-by\-sa\-3.0\-migrated|[Ll]icense +migration +not +eligible)\s*\}\}\s*?\n?/, '');
  cleanup = iterative_replace(/\{\{\s*(?:[Tt]rademark(?:ed)?|[Tt][Mm]|[Ss]VG\-Logo|®)\s*\}\}([\s\S]*)\{\{\s*(?:[Tt]rademark(?:ed)?|[Tt][Mm]|[Ss]VG\-Logo|®)\s*\}\}/, "{{Trademarked}}$1", cleanup);
  cleanup = iterative_replace(/\{\{\s*(?:[Mm]oney\-US|[Pp]D\-USGov\-money)\s*\}\}([\s\S]*)\{\{\s*(?:[Mm]oney\-US|[Pp]D\-USGov\-money)\s*\}\}/, "{{PD-USGov-money}}$1", cleanup);
  cleanup = iterative_replace(/\{\{\s*(?:[Pp]D\-UKGov|[Pp]D\-BritishGov|[Dd]omaine[ _]+public[ _]+UK|[Dd]omainePublicGouvUK|[Pp]D\-UK\-Gov|نگاره[ _]+بریتانیا)\s*\}\}([\s\S]*)\{\{\s*(?:[Pp]D\-UKGov|[Pp]D\-BritishGov|[Dd]omaine[ _]+public[ _]+UK|[Dd]omainePublicGouvUK|[Pp]D\-UK\-Gov|نگاره[ _]+بریتانیا)\s*\}\}/, "{{PD-UKGov}}$1", cleanup);
  cleanup = iterative_replace(/(\{\{\s*(?:[Cc]c\-by\-sa\-3\.0\,2\.5\,2\.0,1\.0|[Cc]c\-by\-sa\-all)\s*\}\}\s*?){2,}/, "{{Cc-by-sa-3.0,2.5,2.0,1.0}}", cleanup);
  cleanup = iterative_replace(/\{\{\s*(?:[Pp]D\-China|[Pp]D\-cn|[Cc]hina\-PD)\}\}([\s\S]*?)\{\{\s*(?:[Pp]D\-China|[Pp]D\-cn|[Cc]hina\-PD)\s*\}\}/, "{{PD-China}}$1", cleanup);
  cleanup = iterative_replace(/\{\{\s*(?:PD\-USGov(\-[A-Za-z]+)(\-[A-Za-z\-]+?))\s*\}\}([\s\S]*?)\{\{\s*PD\-USGov\1?\2\s*\}\}/, "{{PD-USGov$2}}$3", cleanup);
  cleanup = iterative_replace(/\{\{\s*bad *jpe?g\s*\}\}([\s\S]*)\{\{\s*bad *jpe?g\s*\}\}/i, "{{badjpeg}}$1", cleanup);
  cleanup = iterative_replace(/\{\{\s*(?:(?:convert *to|to|should *be)? *svg|vectorize)\s*\}\}([\s\S]*)\{\{\s*(?:(?:convert *to|to|should *be)? *svg|vectorize)\s*\}\}/i, "{{Convert to SVG}}$1", cleanup);
  if (cleanup.search(/\{\{\s*(?:[Pp]D\-text(?: *|\-)logo|[Pp]d\-textlogo|[Tt]extlogo|[Pp]D-\Markenrecht)\s*(?:\|[\s\S]*?)?\}\}/) !==-1 && cleanup.search(/\{\{\s*(?:[Tt]rademark(?:ed)?|[Tt][Mm]|[Ss]VG\-Logo|®)\s*(?:\|[\s\S]*?)?\}\}/) !==-1)
   {cleanup = iterative_replace(/\{\{\s*Bild\-LogoSH\s*(?:\|[\s\S]*?)?\}\}\s*?\n?/, "", cleanup);}

  //templates which are unneeded immediately after license header 
  cleanup = iterative_replace(/((\=\=)\s*.+\s*\2\s*(?:\{\{[\s\S]+\}\}\s*)*\n)\s*\{\{\s*(?:description +missing|date|\-|clr)\s*\}\}\s*?\n?/i, "$1", cleanup);
  cleanup = iterative_replace(/((\=\=)\s*.+\s*\2\s*\n(?:\s*\{\{.+\}\}\s*\n)*)\s*\{\{\s*hidden\s*\}\}\s*?\n?/i, "$1", cleanup);

  //redundant pd-old
  cleanup = cleanup.replace(/\{\{\s*[Pp]D\-old\s*\}\}\s*?\n?\s*\{\{\s*[Pp]D\-old\-100\s*\}\}/, '{{PD-old-100}}');

  //pd-art cleanup
  cleanup = cleanup.replace(/\{\{\s*([Pp][Dd]\-[Aa]rte?|[Bb]ild\-PD\-Kunst|[Pp]D\-kunst)\s*\}\}\s*?\n?\s*\{\{\s*([Pp]D\-[^\|]+)\s*\}\}/, '{{$1|$2}}');
  cleanup = cleanup.replace(/\{\{\s*(?:[Pp][Dd]\-[Aa]rte?|[Bb]ild\-PD\-Kunst|[Pp]D\-kunst)\s*\|\s*[Pp][Dd]\-old\-(70|100)\s*\}\}/, '{{PD-art-$1}}');
  cleanup = cleanup.replace(/\{\{\s*(?:[Pp][Dd]\-[Aa]rte?|[Bb]ild\-PD\-Kunst|[Pp]D\-kunst)\s*\|\s*[Pp][Dd]\-old\s*\}\}/, '{{PD-art-70}}');
  cleanup = cleanup.replace(/\{\{\s*[Pp][Dd]\-US\s*\}\}\s*\{\{\s*([Pp][Dd]\-[Aa]rte?|[Bb]ild\-PD\-Kunst|[Pp]D\-kunst)\s*\}\}/, '{{$1|PD-US}}');

  //insert header, but only above an {{information}} template and if there is at least one other header 
  // on the page already, for asthetic purposes (request by Leyo)
  //TODO: what in the world is going on here?
  var headercheck = cleanup.replace(/([^{]|\s)\{(?!\{)/, "$1MTOWASHERE"); //removing markings which will confuse parser
  headercheck = cleanup.replace(/((?:\{\{)+)\{(?!\{)/, "$1MTOWASHERE"); 
  headercheck = cleanup.replace(/([^\}]|\s)\}(?!\})/, "$1MTOWASHERE"); 
  headercheck = cleanup.replace(/((?:\}\})+)\}(?!\})/, "$1MTOWASHERE"); 
  headercheck = iterative_replace(/\{\{(?:[^{]|\n)+?\}\}/g, "MTOTEMPLATE", headercheck); //only headers that aren't already inside a template will suffice, per what seems right to me
  if (headercheck.match(/\n(\=+).+\1\s*\n/))
    {cleanup = cleanup.replace(/^\s*(\{\{[Ii]nformation\s*\|)/, '== {{int:filedesc}} ==\n$1');}

  /* uploader_status was set wayyyy above; this is a second run (lazy coding) */
  if (uploader_status === NOTAUTHOR)
    {cleanup = mto_uploader_isnt_author(cleanup);}

  //further cleanup second upload type 
  cleanup = cleanup.replace(/(\|\s*[Ss]ource\s*\=\s*.*)(\n\s*\|(?:[\s\S]*\|)?\s*[Aa]uthor\s*\=\s*.*)\s*(\([Tt]ransferr?ed +by +\[\[.+\]\]\))/, '$1 $3$2');
  cleanup = cleanup.replace(/(\(Originally uploaded on [a-z\-]{1,6}\.w[a-z]+)\)?\s*\-\s*\(?([Tt]ransferr?ed +by +\[\[.+\]\]\))/, '$1 - $2');
  cleanup = cleanup.replace(/([Aa]uthor\s*\=.*?)\s*(?:\([Tt]ransferr?ed +by +\[\[.+\]\]\))/, "$1"); //sometimes leftover text from {{self}} template

  //{{original description page|...}}
  cleanup = cleanup.replace(/The\s+original\s+description\s+page\s+(?:is\/was|is|was)\s+\[http\:\/\/([a-z\-]+\.wik[a-z]+)\.org\/w\/index\.php\?title\=.+?(?:\:|%3[Aa])(.+?)\s+here(?:\]\.|\.\])\s+All\s+following\s+user\s+names\s+refer\s+to\s+\1\./g, "{{original description page|$1|$2}}");
  cleanup = cleanup.replace(/This file was originally uploaded at ([a-z\-]+\.wik[a-z]+) as \[http\:\/\/\1\.org\/wiki\/.+?(?:\:|%3[Aa])(\S+?)\s+[^\]]+\]\, before it was transferr?ed to Commons\./, "{{original description page|$1|$2}}");

  //{{transferred from|...}}
  cleanup = cleanup.replace(/(\|\s*[Ss]ource\s*\=\s*|<\s*br\s*\/?\s*>\s*|\n\s*\*)Transferr?ed from \[(?:https?\:)?\/\/([a-z\-]{2,}\.w[a-z]+)\.org\/? \2\](?:(?:\; transferr?ed|; transfer was stated to be made)?(?: to Commons)? by \[\[User\:([^\]]+)\]\])?(?: using (\[\[?.+?\]\]?))?\.?/g, "$1{{transferred from|1=$2|2=$3|3=$4}}");
  cleanup = cleanup.replace(/\{\{transferred from\|1\=([^\|\=\}]*)\|2=([^\|\=\}]*)\|3\=([^\|\}\=]*)\}\}/g, "{{transferred from|$1|$2|$3}}");  
  cleanup = cleanup.replace(/\{\{transferred from\|(1\=)?(.*)\|(2\=)?(.*)\|(3\=)?\}\}/g, "{{transferred from|$1$2|$3$4}}");
  cleanup = cleanup.replace(/\{\{transferred from\|(1\=)?(.*)\|(2\=)?\}\}/g, "{{transferred from|$1$2}}");
  cleanup = cleanup.replace(/(\{\{transferred from\|.*?\|.*?\|\s*(?:3\s*\=)?\s*)\[(?:https?:)?\/\/(?:tools\.wikimedia\.de|toolserver\.org)\/~magnus\/commonshelper\.php ([Cc][Oo][Mm][Mm][Oo][Nn][Ss][Hh][Ee][Ll][Pp][Ee][Rr])\](\s*\}\})/, "$1$2$3");

  //{{self|self|self...|license}} bug
  cleanup = cleanup.replace(/\{\{(?:\s*[Ss]elf2?\s*\|){2,}/g, "{{self|");

  //{{user at project|...}}
  cleanup = cleanup.replace(/\[\[\:([a-z\-]{2,})(?:\:[A-Za-z]+)?\:[Uu][Ss][Ee][Rr]\:([^\|\[\]]+)\|\2\]\]\s+at\s+\[(?:https?\:)?\/\/\1\.(w[a-z]+)\.org\/? [^\|\[\]]+\]/g, "{{user at project|1=$2|2=$3|3=$1}}"); 
  cleanup = cleanup.replace(/\[\[\:[A-Za-z]+\:([a-z\-]{2,})\:[Uu][Ss][Ee][Rr]\:([^\|\[\]]+)\|\2\]\]\s+at\s+\[(?:https?\:)?\/\/\1\.(w[a-z]+)\.org\/? [^\|\[\]]+\]/g, "{{user at project|1=$2|2=$3|3=$1}}"); 
  cleanup = cleanup.replace(/\{\{user at project\|1\=([^\]\|\=]+?)\|2=([^\]\|]+?)\|3=([^\]\|]+?)\}\}/g, "{{user at project|$1|$2|$3}}"); 

  //move category related stuff to bottom
  var categories="";
  cleanup = cleanup + "\n";
  var re_cat = /^([\s\S]*)\[\[\s*category\s*\:\s*([^\[\]]?)([^\|\[\]]*?)\s*(\|[^\|\[\]]*)?\]\](?:(\s*%%%MTOCOMMENT\d+%%%)?\s*?\n)?([\s\S]*?)$/i;
  while(1)
  {
    var nextcat = cleanup.replace(re_cat, function($0, $1, $2, $3, $4, $5) {return "[[Category:"+$2.toUpperCase()+$3+($4===undefined?"":$4)+"]]"+($5===undefined?"":$5)+"\n";;});
    if (nextcat===cleanup) {break;}
    hascat=true;
    categories=nextcat+categories;
    cleanup = cleanup.replace(re_cat, "$1$6");
    
  }
  var re_unc = /^([\s\S]*)\{\{\s*([Uu]ncat(?:egorized)?(?:\s*\|(?:.|\n)*?)*?)\s*\}\}(?:(\s*%%%MTOCOMMENT\d+%%%)?\s*?\n)?([\s\S]*?)$/;   
  var unc = cleanup.replace(re_unc, "{{$2}}$3\n");
  if (unc!==cleanup) //positive match: uncat present
  {
    //even if categories isn't empty, they might be hidden categories, so we don't want to remove {{uncat}}
    categories = unc + categories; 
    cleanup = cleanup.replace(re_unc, "$1$4");
  }
  var re_ds = /^([\s\S]*)\{\{\s*DEFAULTSORT\s*\:([^\[\]\{]*?)\}\}(?:(\s*%%%MTOCOMMENT\d+%%%)?\s*?\n)?([\s\S]*?)$/;
  var ds = cleanup.replace(re_ds, "{{DEFAULTSORT:$2}}$3\n");
  if (ds!==cleanup) //positive match: DEFAULTSORT present
  {
    categories = ds + categories; 
    cleanup = cleanup.replace(re_ds, "$1$4");
  }
  var re_chc = /^([\s\S]*)(\{\{\s*[Cc]heck +categories[^}]+\}\})(\s*%%%MTOCOMMENT\d+%%%)?(\s*?\n[\s\S]*?)$/;
  var chc = cleanup.replace(re_chc, "$2$3\n");
  if (chc!==cleanup) //positive match: checkcategories present
  {
    categories = chc + categories; 
    cleanup = cleanup.replace(re_chc, "$1$4");
  }
  var re_cs_chc = /^([\s\S]*)(%%%MTOCSCOMMENT\d+%%%)(\s*?\n[\s\S]*?)$/;
  var cs_chc = cleanup.replace(re_cs_chc, "$2\n");
  if (cs_chc!==cleanup) //positive match: commonsense comment present
  {
    categories = cs_chc + categories; 
    cleanup = cleanup.replace(re_cs_chc, "$1$3");
  }
  cleanup = cleanup.replace(/\s*$/, "\n\n"); //we want exactly one trailing newline character

  //remove duplicate categories
  categories = iterative_replace(/\[\[Category:(.+?)(\s*\|[^\]]*)?\]\](.*?)\n((?:.+\n)*?)\[\[Category:\1(?:\s*\|[^\]]*)?\]\](.*?)\n/, "[[Category:$1$2]]$3$5\n$4", categories);
  categories = iterative_replace(/^\s*%%%MTOCOMMENT\d+%%%s*$/m, "", categories);
  cleanup += categories;  

  //add {{int:license-header}} where no license header exists, and it unambiguously would be appropriate
  //first, search for end of {{Information}} template, if applicable
  var infoend = cleanup.search(/\{\{\s*[Ii]nformation/);
  for (var template_level=1; template_level>0 && infoend>-1;)
  {
    infoend+=2;
    var opennext  = cleanup.indexOf("{{", infoend);
    var closenext = cleanup.indexOf("}}", infoend);

    if (closenext===-1)
     {infoend=-1; break;}
    else if (opennext<closenext && opennext!==-1)
     {infoend=opennext;  template_level++;}
    else
     {infoend=closenext; template_level--;}      
  }
  if (infoend===-1) {infoend=0;}
  else {infoend+=2;}
  var cleanup_pre = cleanup.substr(0, infoend);
  var cleanup_post = cleanup.substr(infoend);
  cleanup_post = cleanup_post.replace(/^((?:\s*?\{\{\s*(?:(?:should *be|(?:convert *)?to) *(?:svg|png)|svg|vectorize|artifacts|blurry|low quality[\w\-\.\, ]*|wrong +license|disputed[\w\-\.\, ]*|bad *(?:jpe?g|gif|svg)|crop|cleanup[\w\-\.\, ]*|ifc|(?:object +|globe +)?location[\w\-\.\, ]*)\s*(?:\|\s*[^}]+)?\}\})*)\s*(\{\{\s*(?:fop[\w\-\.\, ]*|freedom +of +panorama|cc(?:\-zero|\-by[\w\-\.\, ]*)?|gfdl[\w\-\.\, ]*|pd[\w\-\.\, ]*|trademark|(?:image|remove +)water +mark|self2?|Propio|[Сс]ебе|jac|multilicense[\w\-\.\, ]*|attribution[\w\-\.\, ]*|fal[\w\-\.\, ]*|GPL[\w\-\.\, ]*|wik(?:i[mp]edia|isource|iquote|iversity|tionary)-screenshot|wikiportrait|wikimedia +project +screenshot|flickrr?eview|license *review|ipernityreview|openphotoreview|panoramioreview|PD\-?review|Picasa(?:review|web)|GFDL[\w\-\.\, ]*)\s*(?:\|\s*[^}]+)?\}\})/i, "$1\n== {{int:license-header}} ==\n$2");
  cleanup = cleanup_pre + cleanup_post;  

  //remove {{ImageUpload|...}} (outside comments)
  cleanup = cleanup.replace(/\{\{\s*[Ii]mageUpload\s*(?:\|.+?)?\}\}(?:(\n)\n*)?/, "$1");

  //rebuild <nowiki>, <!-- --> tags
  cleanup = rebuild_comments_nowikis(cleanup, tokens, true);

   //remove {{ImageUpload|...}} (inside comments)
  cleanup = cleanup.replace(/<\!\-\-\s*\{\{\s*[Ii]mageUpload\s*(?:\|.+?)?\}\}\s*\-\->(?:(\n)\n*)?/, "$1");

  //unnecessary transfer message for User:Boteas (per request from Leyo)
  cleanup = cleanup.replace(/(\|\s*[Ss]ource\s*\=[\s\S]+?)\s*(?:<\s*[Bb][Rr]\s*\/?>\s*|\s+)[Tt]ransferr?ed +from +\[?[a-z\-\.\:\/ ]+\]? +to +[Cc]ommons +by +\[\[\s*[Uu]ser\s*\:\s*[Bb]oteas\]\] .+/i, "$1");
  cleanup = cleanup.replace(/(\|\s*[Ss]ource\s*\=\s*[Tt]ransferr?ed +from +\[?[a-z\-\.\:\/ ]+\]? +to +[Cc]ommons) +by +\[\[\s*[Uu]ser\s*\:\s*[Bb]oteas\]\](?: +using +\[[^\]]+\])?\.?\s*$/mi, "$1");

  //remove transfer message if it doesn't communicate anything, and other text isn't present
  cleanup = cleanup.replace(/(\|\s*[Ss]ource\s*\=[\s\S]{3,}?)\s*(?:<\s*[Bb][Rr]\s*\/?>\s*|\s+)(?:[Tt]ransferr?ed +from +\[?[a-z\-\.\:\/ ]+\]?(?: +to +[Cc]ommons)?(?: +using +\[[^\]]+\])?|\{\{\s*[Tt]ransferred +from\s*\|[^\|]+?\}\})\.?\s*$/m, "$1");
  cleanup = cleanup.replace(/(\|\s*[Ss]ource\s*\=.+?)\s*(?:<\s*[Bb][Rr]\s*\/?>\s*)(\|\s*(?:[Dd]ate|[Aa]uthor|[Pp]ermission|[Oo]ther[_ ]versions)\s*\=)/, "$1\n$2"); //rm extra <br>

  //move down transfer message
  cleanup = cleanup.replace(/(\|\s*[Ss]ource\s*\=\s*)(\{\{\s*[Tt]ransferred[ _]+from\|.*?\}\})\.?\s*(?:<\s*[Bb][Rr]\s*\/?>)?\s*(\{\{\s*[Oo]riginal[ _]+text[\s\S]+?\=+\s*\{\{\s*[Oo]riginal[ _]+upload[ _]+log\s*\}\}\s*\=+\s*)/, "$1$3$2 ");
  cleanup = cleanup.replace(/(\|\s*[Ss]ource\s*\=\s*\S[\s\S]+?\s*)(?:\s*<\s*[Bb][Rr]\s*\/?>\.?)?\s*\*?\s*\(\s*(\{\{\s*[Tt]ransferred[ _]+from\|.*?\}\})\s*\)\.?([\s\S]+?\{\{\s*[Oo]riginal[ _]+upload[ _]+log\s*\}\}\s*\=+\s*)/, "$1$3$2 ");
  cleanup = cleanup.replace(/(\|\s*[Ss]ource\s*\=\s*\S[\s\S]+?\s*?)(?:\s*<\s*[Bb][Rr]\s*\/?>\.?)?\s*\*?\s*(\{\{\s*[Tt]ransferred[ _]+from\|.*?\}\})\.?([\s\S]+?\{\{\s*[Oo]riginal[ _]+upload[ _]+log\s*\}\}\s*\=+\s*)/, "$1$3$2 ");

  //spacing 0
  cleanup = iterative_replace(/((\=+)\s*\{\{\s*int\s*\:\s*license\-header\s*\}\}\s*\2*\n(?:\s*?\{\{.+\}\}\n\s*?)*(?:\s*?\{\{.+\}\}\n\s*?))\s*\n+/, "$1", cleanup);  

  //spacing 1
  cleanup = cleanup.replace(/(dimensions\s*\|\s*comment)(\s*\n){2,}\*/i, '$1\n*'); //inexplicably, this bothers me beyond measure

  //completely empty nowikis... no reason for these
  cleanup = cleanup.replace(/\s*(?:\'\'|<small>)\s*<nowiki>\s*<\/nowiki>\s*(?:\'\'|<\/small>)\s*?\n/gm, '\n'); /* completely empty... just get rid of whole clause */

  cleanup = cleanup.replace(/\n<!\-\- Templates .+ do not appear to exist on commons. \-\->\s*\n/, '\n');
  if (hascat)
   {cleanup = cleanup.replace(/<\!\-\-\s*remove\s*this\s*line\s*once\s*you\s*have\s*added\s*categories\s*\-\->\s*/i, '');}

  //spacing 2 (need to take apart comments again)
  cnwt = parse_comments_nowikis(cleanup);
  cleanup = cnwt[0];
  tokens = cnwt[1];
  cleanup = cleanup.replace(/^\s+((\=+)\s*\{\{\s*[Ii]nt\s*\:\s*[Ff]iledesc\s*\}\}\s*\2\s*\n)/, '$1');
  cleanup = cleanup.replace(/\n+(\=+)(.+)\1\s*?\n+/g, '\n\n$1$2$1\n');
  cleanup = cleanup.replace(/\s*(\[\[Category:.*?\]\]|\{\{\s*[Uu]ncat(?:egorized)?[\s\S]*?\s*\}\}|\{\{\s*DEFAULTSORT\s*\:([^\[\]\{]*?)\}\}|\{\{\s*[Cc]heck +categories[\s\S]*?\s*\}\}|<\!\-\-\s*[Cc][Aa][Tt][Ee][Gg][Oo][Rr][Ii][Ee][Ss]\s*[Bb][Yy]\s*(?:[Cc][Oo][Mm][Mm][Oo][Nn][Ss][Ee][Nn][Ss][Ee]|[Cc][Hh][Ee][Cc][Kk][Uu][Ss][Aa][Gg][Ee])\s*\-\->)/, '\n\n$1');
  cleanup = rebuild_comments_nowikis(cleanup, tokens, false /* unnecessary, done above */);

  textbox.value=cleanup;
  if (typeof(document.editform)!=="undefined" && originaltext!==cleanup)
  {
    var summary = "cleanup using [[User:Magog the Ogre/cleanup.js]]";
    if (document.editform.wpSummary.value.match(/^\s*$/))
      {document.editform.wpSummary.value += summary;}
    else if (!document.editform.wpSummary.value.match(/^.*cleanup using.*\s*$/m))
      {document.editform.wpSummary.value = document.editform.wpSummary.value.replace(/(.*?)\s*/m, "$1") + ' +' +summary;}
  }
}

function mto_cleanup_ts()
{
  "use strict";

  window.onbeforeunload = null; /* kill "are you sure you want to leave this page" warning */

  var protocol = document.URL.replace(/(.+?)\:\/\/.+/, "$1");
  var postform = document.createElement("form");
  postform.action=protocol+"://toolserver.org/~magog/do_cleanup.php";
  postform.method="post";

  var input = document.createElement("input");
  input.setAttribute("name", "image");
  input.setAttribute("value", wgPageName);
  postform.appendChild(input);
  if (typeof document.editform !=='undefined')
  {
    var textarea = document.createElement("textarea") ;
    textarea.setAttribute("name", "text");
    textarea.innerHTML=document.editform.wpTextbox1.value.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;');
    postform.appendChild(textarea);
  }
  document.body.appendChild(postform);
  postform.submit();
  document.body.removeChild(postform);
}

function post_cleanup_ts(autosave_ignored)
{
  "use strict";
  
  var textbox = getParamValue('uploadformstyle')==="basic"?wpUploadDescription:document.editform.wpTextbox1;
  var cleanuptmp = textbox.value;
  var originaltext = cleanup;

  //store comments, nowikis as is to avoid changing them 
  var cnwt = parse_comments_nowikis(cleanuptmp);
  var cleanup = cnwt[0];
  var tokens = cnwt[1];

  //mark as bot move checked
  var cleanup = bot_move_checked(cleanup);

  //restore comments
  cleanup = rebuild_comments_nowikis(cleanup, tokens, false);

  //add to textbox
  textbox.value=cleanup;

  //make a null edit to the edit summary, to turn off the MediaWiki flag which incorrectly warns that the summary is empty
  document.editform.wpSummary.value += " ";
}

//stolen shamelessly and modified from the add {{information}} template text
function add_toolbox_button(text, _href) 
{
  "use strict";

  var toolbox = document.getElementById ( "p-tb" ) ;
  if ( !toolbox )
    {return ;}
  var ul = toolbox.getElementsByTagName ( "UL" ) [0] ;
  var li = document.createElement ( "LI" ) ;
  var a = document.createElement ( "A" ) ;
  a.appendChild (document.createTextNode(text));
  a.href = _href;
  a.className = "external" ;
  li.appendChild ( a ) ;
  ul.appendChild ( li ) ;
}
 
 
//stolen shamelessly and modified from the add {{information}} template text
function add_topbar_button(text, _href) 
{
  "use strict";

  mw.util.addPortletLink("p-cactions", _href, text, "");
}
function addFunction(functionNameString, buttonDisplayName, checkNameSpaceFlag, whereAdd, override)
{
  "use strict";

  checkNameSpaceFlag|= (wgPageName==="Commons:Sandbox");
  if (!checkNameSpaceFlag)
    {return;}
 
  if (getParamValue('functionName')===functionNameString)
  {
    addOnloadHook
    (
      function ()
      {
        eval(functionNameString+"(true)");
      }
    );
  }
 
  var _href;
  if (wgAction==="edit" || wgAction==="submit" || override === true) 
    {_href = "javascript:"+functionNameString+"(true)";}
  else
    {_href = wgScript + "?title="+encodeURIComponent(mw.config.get('wgPageName'))+ "&action=edit&functionName="+functionNameString;}
 
  if (whereAdd==="top")
    {addOnloadHook(function (){add_topbar_button(buttonDisplayName, _href);});}
  else if (whereAdd==="toolbox")
    {addOnloadHook(function (){add_toolbox_button(buttonDisplayName, _href);});}
}
 
function addImageFunction(functionNameString, buttonDisplayName, buttonLocation)
{
  "use strict";

  addFunction(functionNameString, buttonDisplayName, wgNamespaceNumber === 6, buttonLocation, false);
}

addFunction("mto_cleanup", "cleanup JS", wgNamespaceNumber === 6 || wgPageName === "Special:Upload", "toolbox", wgPageName === "Special:Upload");

if (wgNamespaceNumber===6)
{
  addOnloadHook(function (){add_toolbox_button("cleanup TS", "javascript:mto_cleanup_ts()");});
}
addImageFunction("post_cleanup_ts", null, null);

if (wgAction==="edit" || wgAction==="submit")  
  {addImageFunction("mto_own_wrapper", "{{own}}", "toolbox");}
if (wgAction==="edit" || wgAction==="submit")  
  {addImageFunction("mto_self_photographed_wrapper", "{{self-photographed}}", "toolbox");}
//</nowiki>