User:JarektBot/Commons creator maintenance.py

#!/usr/bin/python
# -*- coding: utf-8  -*-
"""
Script for maintenance and automatic improvements of Creator templates and their home categories on Wikimedia Commons.
Specific tasks:
1) Creator template home-categories:
  * if multiple [[category:people by name]] then delete some
  * if creator template is missing than add it
  * Add birth  and death categories if needed
  * Add DEFAULTSORT if needed
2) Creator templates:
  * Add {{LangSwitch}} to "Name" field based on interwiki links if present in the home-category
  * If current Name" field has content that does not fit several predefined patterns than add
    {{LangSwitch}} to "Name1" field so it can be merged with "Name" field by hand
  * Add "Linkback" field if missing, fix if needed
  * Add "Option" field if missing, fix if needed
  * make sure all {{Authority Control}} templates use "|bare=1" option
  * move {{Authority Control}} templates from Category to Creator
  * In case other edits are done perform bunch of cosmetic edits. 
 
Other possible tasks:
 * copy interwiki links from gallery to the category is needed (separate bot for all pages?)
 * update existing interwiki links inside "Name" {{LangSwitch}} block
 * copy nationality and occupation from Category to Creator if missing
 * copy nationality and occupation from Creator to Category if missing
 * copy {{Authority Control}} templates from German or English Wikipedia to Creator (done as separate bot)
 * copy data (dates, places, etc.) from English or German  Wikipedia.
 * tag problematic pages: autocategorizing, with <noinclude> blocks, with links in "Description", etc.
 
Other notes:
 * Run the bot after recent run of interwiki.py to ensure that all the links are up to date
"""
 
#
# (C) Jarekt, 2011
#
# Distributed under the terms of the MIT license.
#
 
import sys, os.path, glob, string, urllib2, json
import wikipedia as pywikibot
import pywikibot.textlib as textlib
import config, catlib, time, re
 
# ===============================================================
def Interwiki2LangSwitch(page, Gallery_name, nationality):
# Create Name Lang Switch block based on interwiki links and gallery page name
# Inputs:
# * page - Commons page with interwiki links (usually a category page)
# * Gallery_name - name which could be likely for a name of a gallery associated with the interwiki links
# * nationality  - 2 character nationality code 
  Commons_site = pywikibot.getSite(u'commons', u'commons')
  WikiList = [nationality,"de","fr","nl","it","es","pl","pt"] # Wikipedias in the order of size (will be used to choose default article among the interwiki links
  LangSwitch    = u''    # Text string used to store Name LangSwitch block
  nLinks        = 0      # Number of interwiki links found
  needDefault   = True   # is a default option present?
  defaultStrIdx = 10;
  defaultName   = re.sub('\([^\)]*\)','',Gallery_name); # delete disambiguation sections, if any
  defaultStr    = defaultName
 
  # Get and sort interwiki links 
  interwiki = {}
  for pl in page.interwiki():
    interwiki[pl.site()] = pl
  if len(interwiki)==0:
    return u'', 0  
  langIdx = textlib.interwikiSort(interwiki.keys(), insite = Commons_site)
 
  # Go through the interwiki link list and create new text
  for site in langIdx:
    lang = site.lang       # country code
    pl   = interwiki[site]
    iw1  = pl.title()
    iw2  = re.sub('\s*\([^\)]*\)','',pl.sectionFreeTitle()); # delete disambiguation sections
    if (lang=='en'):
      needDefault = False;
    if (lang=='ru'):
      m=re.search("([^\s\,]*)\, (.*)", iw2)  # flip order of names in Russian if writen as LASTNAME, FIRST MIDDLE
      if m!=None:
        iw2 = m.group(2)+' '+m.group(1)
    if lang in WikiList:
      idx = WikiList.index(lang)
      if idx<defaultStrIdx:
        defaultStrIdx=idx
        defaultStr = '[[:'+ lang + ':' + iw1 + u'|' + defaultName + u']]'
    LangSwitch = LangSwitch + u'   | ' + lang + u' = [[:'+ lang + ':' + iw1 + u'|' + iw2 + u']]\n'
    LangSwitch1 = u'[[:'+ lang + ':' + iw1 + u'|' + Gallery_name + u']]'
    nLinks += 1
    
  if nLinks==0:
    return u'', nLinks
 
  # Add gallery page if one exists
  Gallery_page = pywikibot.Page(Commons_site, Gallery_name)
  if Gallery_page.exists() and not Gallery_page.isRedirectPage():
    pywikibot.output('[['+Gallery_name+']] exists')
    LangSwitch = LangSwitch + u'   | default = [['+ Gallery_name + u']]\n'
    needDefault = False;
    nLinks += 1

  if nLinks==1:
    return LangSwitch1, nLinks
 
  # Add default value if EN version and gallery do not exist
  if needDefault==True:
    LangSwitch = LangSwitch + u'   | default = '+ defaultStr + u'\n'
 
  # Add begining and the end brackets  
  LangSwitch = u'{{LangSwitch\n' + LangSwitch + u' }}'
 
  return LangSwitch, nLinks
 
def Wikiproject2LangSwitch(links, name, letter, nationality):
    default    = u'' 
    wiki_link  = u'' 
    nLinks     = 0      # number of links
    hasEN      = False  # has english version?
    lang_link  = {}
    for subkey, subvalue in links.iteritems():
      if name in subkey:
        lang = subkey.replace(name,'')              # language of the wikisource
        wiki_link = u'%s:%s:%s\n' % (letter, lang, subvalue['title'])  # wikisource link
        lang_link[lang] = wiki_link
        if lang=='en':
          hasEN = True
        if lang==nationality: # if wikisource language matches author's nationality ...
          default = wiki_link # than use that link as default
        nLinks = nLinks + 1   # count wikisource links
 
    if nLinks<2:
      return wiki_link
 
    #langIdx = textlib.interwikiSort(lang_link.keys(), insite = Commons_site)
    LangSwitch = u''          # build wikisource string
    for lang in sorted(lang_link):
      LangSwitch = LangSwitch + u'   |%s=%s\n' % (lang, lang_link[lang]) # build LangSwitch block         
 
    if len(default)>0:
        LangSwitch = u'{{LangSwitch\n%s   |default=%s\n }}' % (LangSwitch, default) # create LangSwitch template with default
    else:
      if hasEN:
        LangSwitch = u'{{LangSwitch\n' + LangSwitch + u' }}' # otherwise create LangSwitch template with EN version as default
      else:
        LangSwitch = u'{{LangSwitch\n%s   |default=%s\n }}' % (LangSwitch, wiki_link) # otherwise create LangSwitch template with anything as default
    print LangSwitch    
    return LangSwitch
 
 
 
#================================================
class CreatorMaintenanceBot:
 
  # === constructor ======================================================================
  def __init__(self, always=False):
  #initialize the object
    self.always       = False  # always save without asking: should be false
    self.dry_run      = False  # run the code without saving?
    self.iEditCount   = 0      # initialize edit counter
    self.maxEditCount = -200   # stop after number of edits (used only if >0)
    self.init()                # initialize the rest of attributes
 
  # === initialize ======================================================================
  def init(self):
  # list of atributes that need to be initialized for each page
  # All those values will be populated based on parsing of current Creator template
    self.homecat      = u""
    self.option       = u""
    self.sortkey      = u""
    self.deathdate    = u""
    self.birthdate    = u""
    self.deathyear    = u""
    self.birthyear    = u""
    self.deathyear1   = u""
    self.birthyear1   = u""
    self.authority    = u""
    self.name         = u""
    self.name1        = u""
    self.nationality  = u""
    self.type         = u""
    self.linkback     = u""
    self.wikisource   = u""
    self.wikiquote    = u""
    self.wikidata     = u""
    self.AC_template  = u""
    self.interwiki    = u""
 
  # === cosmetic_changes ======================================================================
  def creator_cosmetic_changes(self, creator_txt):
  # minor clean up changes to be performed only if page is going to be edited anyway
    creator_txt = creator_txt.replace('|PND=','|GND=')        # authority control template parameter
    creator_txt = creator_txt.replace('\n\n','\n')            # delete empty lines
    creator_txt = creator_txt.replace('maleale','male')       # clean up after some old bot runs
    creator_txt = creator_txt.replace('femaleemale','female')
 
    # spacing correction
    creator_txt = re.sub(ur'\n\s*\|\s*Name1\s*= *'            ,ur'\n | Name1             = ',creator_txt)
    creator_txt = re.sub(ur'\n\s*\|\s*Name\s*= *'             ,ur'\n | Name              = ',creator_txt)
    creator_txt = re.sub(ur'\n\s*\|\s*Alternative names\s*= *',ur'\n | Alternative names = ',creator_txt)
    creator_txt = re.sub(ur'\n\s*\|\s*Nationality\s*= *'      ,ur'\n | Nationality       = ',creator_txt)
    creator_txt = re.sub(ur'\n\s*\|\s*Gender\s*= *'           ,ur'\n | Gender            = ',creator_txt)
    creator_txt = re.sub(ur'\n\s*\|\s*Occupation\s*= *'       ,ur'\n | Occupation        = ',creator_txt)
    creator_txt = re.sub(ur'\n\s*\|\s*Description\s*= *'      ,ur'\n | Description       = ',creator_txt)
    creator_txt = re.sub(ur'\n\s*\|\s*Birthdate\s*= *'        ,ur'\n | Birthdate         = ',creator_txt)
    creator_txt = re.sub(ur'\n\s*\|\s*Birthyear\s*= *'        ,ur'\n | Birthyear         = ',creator_txt)
    creator_txt = re.sub(ur'\n\s*\|\s*Birthloc\s*= *'         ,ur'\n | Birthloc          = ',creator_txt)
    creator_txt = re.sub(ur'\n\s*\|\s*Deathdate\s*= *'        ,ur'\n | Deathdate         = ',creator_txt)
    creator_txt = re.sub(ur'\n\s*\|\s*Deathyear\s*= *'        ,ur'\n | Deathyear         = ',creator_txt)
    creator_txt = re.sub(ur'\n\s*\|\s*Deathloc\s*= *'         ,ur'\n | Deathloc          = ',creator_txt)
    creator_txt = re.sub(ur'\n\s*\|\s*Workperiod\s*= *'       ,ur'\n | Workperiod        = ',creator_txt)
    creator_txt = re.sub(ur'\n\s*\|\s*Workloc\s*= *'          ,ur'\n | Workloc           = ',creator_txt)
    creator_txt = re.sub(ur'\n\s*\|\s*Image\s*= *'            ,ur'\n | Image             = ',creator_txt)
    creator_txt = re.sub(ur'\n\s*\|\s*Sortkey\s*= *'          ,ur'\n | Sortkey           = ',creator_txt)
    creator_txt = re.sub(ur'\n\s*\|\s*Homecat\s*= *'          ,ur'\n | Homecat           = ',creator_txt)
    creator_txt = re.sub(ur'\n\s*\|\s*Option\s*= *'           ,ur'\n | Option            = ',creator_txt)
    creator_txt = re.sub(ur'\n\s*\|\s*Linkback\s*= *'         ,ur'\n | Linkback          = ',creator_txt)
    creator_txt = re.sub(ur'\n\s*\|\s*Authority\s*= *'        ,ur'\n | Authority         = ',creator_txt)
    creator_txt = re.sub(ur'\n\s*\|\s*Wikisource\s*= *'       ,ur'\n | Wikisource        = ',creator_txt)
    creator_txt = re.sub(ur'\n\s*\|\s*Wikidata\s*= *'         ,ur'\n | Wikidata          = ',creator_txt)
    creator_txt = re.sub(ur'\n\s*\|\s*Wikiquote\s*= *'        ,ur'\n | Wikiquote         = ',creator_txt)
    creator_txt = re.sub(ur'\n\s*\|\s*([^=]*)\s*=\[\[w?\:\1\:',ur'\n   |\1=[[:\1:'          ,creator_txt) # spacing of lines in LangSwitch block
 
    # correct values of specific fields
    creator_txt = re.sub(ur'\n\s*\|\s*Gender\s*= *m *\n'              ,ur'\n | Gender            = male\n',creator_txt)
    creator_txt = re.sub(ur'\n\s*\|\s*Gender\s*= *f *\n'              ,ur'\n | Gender            = female\n',creator_txt)
    creator_txt = re.sub(ur'\n\s*\|\s*Nationality\s*= *[Ff]rench *\n' ,ur'\n | Nationality       = FR\n',creator_txt)
    creator_txt = re.sub(ur'\n\s*\|\s*Nationality\s*= *[Gg]erman *\n' ,ur'\n | Nationality       = DE\n',creator_txt)
    creator_txt = re.sub(ur'\n\s*\|\s*Nationality\s*= *[Ii]talian *\n',ur'\n | Nationality       = IT\n',creator_txt)
    creator_txt = re.sub(ur'\n\s*\|\s*Nationality\s*= *[Gg]reek *\n'  ,ur'\n | Nationality       = GR\n',creator_txt)
    creator_txt = re.sub(ur'\n\s*\|\s*Nationality\s*= *[Jj]apanese *\n',ur'\n | Nationality       = JP\n',creator_txt)
    creator_txt = re.sub(ur'\n\s*\|\s*Nationality\s*= *[Dd]utch *\n'  ,ur'\n | Nationality       = NL\n',creator_txt)
    creator_txt = re.sub(ur'\n\s*\|\s*Nationality\s*= *[Pp]olish*\n'  ,ur'\n | Nationality       = PL\n',creator_txt)
    creator_txt = re.sub(ur'\n\s*\|\s*Nationality\s*= *[Rr]ussian*\n' ,ur'\n | Nationality       = RU\n',creator_txt)
    creator_txt = re.sub(ur'\n\s*\|\s*Nationality\s*= *[Ss]panish*\n' ,ur'\n | Nationality       = ES\n',creator_txt)
    creator_txt = re.sub(ur'\n\s*\|\s*Nationality\s*= *[Bb]ritish*\n' ,ur'\n | Nationality       = GB\n',creator_txt)
    creator_txt = re.sub(ur'\n\s*\|\s*Nationality\s*= *[Aa]merican*\n',ur'\n | Nationality       = US\n',creator_txt)
    creator_txt = re.sub(ur'\n\s*\|\s*Birthyear\s*= *\n',ur'\n',creator_txt)
    creator_txt = re.sub(ur'\n\s*\|\s*Deathyear\s*= *\n',ur'\n',creator_txt)
    return creator_txt
 
 
  # === Save page ======================================================================
  def save(self, text, page, comment, minorEdit=False, botflag=True):
  # save dialogs  
      # only save if something was changed
      old_txt = page.get()
      if text != page.get():
          # Show the title of the page we're working on.
          # Highlight the title in purple.
          pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
                           % page.title())
          # show what was changed
          pywikibot.showDiff(old_txt, text)
          pywikibot.output(u'Comment: %s' %comment)
          if not self.dry_run:
              if not self.always:
                  choice = pywikibot.inputChoice(
                      u'Do you want to accept these changes?',
                      ['Yes', 'No', 'Always', 'Quit'],
                      ['y', 'N', 'a', 'q'], 'N')
                  if choice == 'a':
                      self.always = True
                  elif choice == 'q':
                      import sys
                      sys.exit()
              if self.always or choice == 'y':
                  try:
                      # Save the page
                      page.put(text, comment=comment,
                               minorEdit=minorEdit, botflag=botflag)
                      self.iEditCount+=1
                      if (self.iEditCount==self.maxEditCount):
                        sys.exit()
                  except pywikibot.LockedPage:
                      pywikibot.output(u"Page %s is locked; skipping."
                                       % page.title(asLink=True))
                  except pywikibot.EditConflict:
                      pywikibot.output(
                          u'Skipping %s because of edit conflict'
                          % (page.title()))
                  except pywikibot.SpamfilterError, error:
                      pywikibot.output(
u'Cannot change %s because of spam blacklist entry %s'
                          % (page.title(), error.url))
                  else:
                      return True
      return False
 
  # === Query CreatorLinks database ======================================================================
  def QueryCreatorLinks(self, creator_page, new_creator_txt, message):
  # Query CreatorLinks database parse results and if the data is not in creator template than add
  # data to the creator template
 
    # create query url and call CreatorLinks database
    name    = creator_page.title(underscore=True)
    urlstr  = u"http://creatorlinks.wmflabs.org/index.php?site=commons&format=json&title="+name
    urlstr  = urlstr.encode('utf-8')
    pywikibot.output(urlstr)
    try:
      content = urllib2.urlopen(urlstr).read()
      output_json = json.loads(content)
    except:
      return new_creator_txt, message
 
    # add birthdate to creator template
    birth_date = output_json.get('birthYear', '')
    if birth_date is None:
      birth_date =  u'';
    if len(self.birthyear+self.birthdate)==0 and len(birth_date)>0:
      new_creator_txt = re.sub(ur'Birthdate[^\n]*\n', ur'Birthdate        = %s\n'%birth_date, new_creator_txt)        
      message = message + u'Add Birthdate; '

    # add deathdate to creator template
    death_date = output_json.get('deathYear', '')
    if death_date is None:
      death_date =  u'';
    if len(self.deathyear+self.deathdate)==0 and len(death_date)>0:
      new_creator_txt = re.sub(ur'Deathdate[^\n]*\n', ur'Deathdate        = %s\n'%death_date, new_creator_txt)        
      message = message + u'Add Deathdate; '
 
    # get links from the database
    links = output_json.get('links', '')
    if len(links)==0:
      return new_creator_txt, message
##    else:
##      for subkey, subvalue in links.iteritems():
##        print subkey, subvalue['title']
 
    # add wikidata to creator template
    wikidata = links.get('wikidata', '')
    if (len(self.wikidata)==0 and len(wikidata)>0): # if not already present in the template, but present in the DB
      print 'wikidata = ' + wikidata['title']
      if 'Wikidata' in new_creator_txt:
        new_creator_txt = re.sub(ur'(Wikidata[^\n]*)\n', ur'Wikidata          = %s\n'%wikidata['title'], new_creator_txt)        
      else:
        new_creator_txt = re.sub(ur'(\n\s*\|\s*)(Option\s*=[^\n]*)\n', ur'\1Wikidata          = %s\n\1\2\n'%wikidata['title'], new_creator_txt)
      message = message + u'Add Wikidata; '
 
    # add or modify authority control data in to creator template
    AC_key = ['ulan','isni','bnf','viaf','lccn','gnd','nla'] # links to look up
    if len(self.authority)==0:
      self.authority=u'{{Authority control|bare=1}}'
    self.authority = self.authority.replace('|PND=','|GND=')
    self.authority = re.sub(ur'\|[^=]*= *\|', ur'|', self.authority) # delete empty fields
    self.authority = re.sub(ur'\|[^=]*= *\|', ur'|', self.authority) # delete empty fields again (not sure why once is not enough sometimes)
    new_authority = self.authority
    pywikibot.output(new_authority)
    for key in AC_key: # add creatorlinks AC links
      lnk = links.get(key, '')
      if len(lnk)>0 and key.upper() not in new_authority:
        new_authority = new_authority.replace('|bare', '|%s=%s|bare' % (key.upper(), lnk['title']))
    if new_authority != self.authority:
      if 'Authority' in new_creator_txt:
        new_creator_txt = re.sub(ur'(Authority[^\n]*)\n', ur'Authority        = %s\n'%new_authority, new_creator_txt)        
        message = message + u'Modify Authority control; '
      else:
        new_creator_txt = re.sub(ur'(\n\s*\|\s*)(Linkback\s*=[^\n]*)\n', ur'\1\2\1Authority         = %s\n'%new_authority, new_creator_txt)
        message = message + u'Add Authority control; '
    pywikibot.output(new_authority)
 
    # add or modify wikisource links in creator template
    wikisource = Wikiproject2LangSwitch(links, 'source', 's', self.nationality.lower())
    print 'self.wikisource = '+self.wikisource
    print 'wikisource = '+wikisource
    if len(wikisource)>0 and self.wikisource != wikisource.strip():
      if len(self.wikisource)>0:
        tmp = new_creator_txt
        new_creator_txt = re.sub(ur'Wikisource\s*=\s*\{\{[Ll]ang[Ss]witch[^\}]*\}\}', ur'Wikisource         = %s'%wikisource, new_creator_txt)
        if tmp == new_creator_txt:
          new_creator_txt = re.sub(ur'Wikisource\s*=\s*s:\w\w:.*', ur'Wikisource         = %s'%wikisource, new_creator_txt)
        message = message + u'Modify Wikisource; '  
      else:
        new_creator_txt = re.sub(ur'(\n\s*\|\s*)(Option\s*=[^\n]*)\n', ur'\1Wikisource         = %s\n\1\2\n'%wikisource, new_creator_txt)
        message = message + u'Add Wikisource; '
 
    # add or modify wikiquote links in creator template
    wikiquote = Wikiproject2LangSwitch(links, 'quote', 'q', self.nationality.lower())
    print 'self.wikiquote = ' + self.wikiquote
    print 'wikiquote = ' + wikiquote
    if len(wikiquote)>0 and self.wikiquote != wikiquote.strip():
      if len(self.wikiquote)>0:
        tmp = new_creator_txt
        new_creator_txt = re.sub(ur'Wikiquote\s*=\s*\{\{[Ll]ang[Ss]witch[^\}]*\}\}', ur'Wikiquote          = %s'%wikiquote, new_creator_txt)
        if tmp == new_creator_txt:
          new_creator_txt = re.sub(ur'Wikiquote\s*=\s*s:\w\w:.*', ur'Wikiquote          = %s'% wikiquote, new_creator_txt)
        message = message + u'Modify Wikiquote; '
      else:
        new_creator_txt = re.sub(ur'(\n\s*\|\s*)(Option\s*=[^\n]*)\n', ur'\1Wikiquote          = %s\n\1\2\n'%wikiquote, new_creator_txt)
        message = message + u'Add Wikiquote; '
 
    # create interwiki text 
    lang_link = {}
    for subkey, subvalue in links.iteritems():
      if 'wiki' in subkey:
        lang = subkey.replace('wiki','')              # language of the wikisource
        if 'data' not in lang:
          wiki_link = u'[[%s:%s]]\n' % (lang, subvalue['title'])  # wikisource link
          lang_link[lang] = wiki_link
 
    self.interwiki = u''          # build wikisource string
    for lang in sorted(lang_link):
      self.interwiki = self.interwiki + lang_link[lang] # build LangSwitch block         
 
    return new_creator_txt, message
 
 
  # === Edit category based on Creator page ======================================================================
  def EditCategory(self, cat_txt, creator_page, cat_page):
    ''' Edit Creator home category based on text in Creator template
    '''
    message = u'Add ';
    cat_txt_l = cat_txt.lower()
    cat_txt_l = cat_txt_l.replace('_',' ')    # category text with "_" instead of spaces
 
    # If there are issues with home category than write it to the log
    if (cat_page.isRedirectPage()) or ('category:people by name' not in cat_txt_l):
      site = pywikibot.getSite(u'commons', u'commons')
      page = pywikibot.Page(site, u'User:Jarekt/Brudnopis')
      page.put(page.get()+u'\n*[[:'+cat_page.title()+']]', 'New Creators')
      return
 
    # if there are multiple category:people by name then delete some
    if (('[[category:people by name]]' in cat_txt_l) and ('[[category:people by name|' in cat_txt_l)):
      cat_txt = re.sub(u'\[\[Category:People by name\|[^\]]*\]\]', '', cat_txt, flags=re.IGNORECASE)
      message = u'Delete category double; Add ';
 
    # minor fixes only added when other fixes are performed
    cat_txt = cat_txt.replace('[[category:','[[Category:')
    m=re.search("{{DEFAULTSORT:([^\}]*)\}\}", cat_txt)  # and look for defaultsort
    if m!=None:
      sortkey = m.group(1).strip() # if found defaultsort than remove it from categories
      cat_txt = cat_txt.replace('|'+sortkey+']]',']]')
    new_cat_txt = cat_txt
 
    # if creator template is missing than add it
    if (('{{creator:' not in cat_txt_l) and ('{{:creator:' not in cat_txt_l)):
       new_cat_txt = '{{'+creator_page.title()+'}}\n'+new_cat_txt
       message = message + u'creator template, '
 
    # Add birth category
    if ('births' not in cat_txt):
      if len(self.birthyear)>0:
        bdate = self.birthyear
      else:
        bdate = self.birthyear1
      if len(bdate)>0:
        new_cat_txt = new_cat_txt.replace('[[Category:People by name','[[Category:'+bdate+' births]]\n[[Category:People by name')
        message = message + u'birth year category, '
 
    # Add death category
    if ('deaths' not in cat_txt):  
      if len(self.deathyear)>0:
        ddate = self.deathyear
      else:
        ddate = self.deathyear1
      if len(ddate)>0:
        new_cat_txt = new_cat_txt.replace('[[Category:People by name','[[Category:'+ddate+' deaths]]\n[[Category:People by name')
        message = message + u'death year category, '
 
    # Add DEFAULTSORT
    if ('{{DEFAULTSORT:' not in cat_txt and len(self.sortkey)>0):  
      new_cat_txt = '{{DEFAULTSORT:'+self.sortkey+'}}\n'+new_cat_txt
      message = message + u'DEFAULTSORT, '
 
    # Add interwiki links if missing
    cat_interwiki = cat_page.interwiki()
    if len(cat_interwiki)==0 and len(self.interwiki)>0:
      new_cat_txt = new_cat_txt + '\n' + self.interwiki
      message = message + u'interwiki links, '
 
 
    # save changed text if any
    if new_cat_txt != cat_txt:
      self.save(new_cat_txt, cat_page, message)
 
  # === Edit creator page ====================================================================== 
  def EditCreator(self, creator_txt, cat_txt, creator_page, cat_page):
    ''' Edit Creator template based on text in Creator home category  
    '''
    message = u''
    new_creator_txt = creator_txt
 
    # follow redirect
    #print 'cat_page.titleWithoutNamespace()='+cat_page.titleWithoutNamespace()
    #print 'self.homecat='+self.homecat
    if (cat_page.titleWithoutNamespace()!=self.homecat) :
      new_creator_txt = re.sub(ur'(Homecat\s*=)[^\n]*', ur'\1 '+cat_page.titleWithoutNamespace(), new_creator_txt)
      message = message + u'Correct homecat; '
 
    # Add {{LangSwitch}} to Name
    nLang = 0
    Name = re.sub(ur'\([^\)]*\)', '', creator_page.titleWithoutNamespace() ) # person's name no disambiguations
    if (len(self.name1)==0 and 'langswitch' not in self.name.lower()):
      NameLangSwitch, nLang = Interwiki2LangSwitch(cat_page, Name, self.nationality.lower())
      if (nLang>0):
        if (nLang==1):
          msg=u'Add wikipedia link to Name; '
        else:
          msg=u'Add {{LangSwitch}} to Name; '
        Match = False;
 
        if (self.name==NameLangSwitch):  # before and after is the same than skip
          Match = True
        # Name string is a template -> leave as is
        if Match==False:  
          txt = u"(Name\s*=\s*)\{\{"+Name+u"\}\}"
          m=re.search(txt, new_creator_txt) 
          if m!=None:   
            Match = True
 
        # Name string is basic text -> replace with LangSwitch
        if Match==False: 
          txt = u"(Name\s*=\s*)"+Name
          m=re.search(txt, new_creator_txt)  
          if m!=None:                                 
            new_creator_txt = re.sub(txt, ur'\1'+NameLangSwitch, new_creator_txt)
            message = message + msg
            Match = True
 
        if Match==False: # Gallery -> replace with LangSwitch
          txt = u"(Name\s*=\s*)\[\["+Name+u"\]\]"
          m=re.search(txt, new_creator_txt) 
          if m!=None: 
            new_creator_txt = re.sub(txt, ur'\1'+NameLangSwitch, new_creator_txt)
            message = message + msg
            Match = True
 
        if Match==False: # {{w|...}} -> replace with LangSwitch
          txt = u"(Name\s*=\s*)\{\{w\|"+Name+u"[^\}]*\}\}"
          m=re.search(txt, new_creator_txt) 
          if m!=None: 
            new_creator_txt = re.sub(txt, ur'\1'+NameLangSwitch, new_creator_txt)
            message = message + msg
            Match = True
 
        if Match==False: # Category -> replace with LangSwitch
          txt = u"(Name\s*=\s*)\[\[:[Cc]ategory:"+Name+u"\|"+Name+u"\]\]"
          m=re.search(txt, new_creator_txt) 
          if m!=None: 
            new_creator_txt = re.sub(txt, ur'\1'+NameLangSwitch, new_creator_txt)
            message = message + msg
            Match = True
 
        if Match==False: # bad wiki link -> replace with LangSwitch
          txt = u"(Name\s*=\s*)\[\[[a-z\:]*"+Name+u"\]\]"
          m=re.search(txt, new_creator_txt) 
          if m!=None: 
            new_creator_txt = re.sub(txt, ur'\1'+NameLangSwitch, new_creator_txt)
            message = message + msg
            Match = True
 
        if (Match==False and self.name!=NameLangSwitch):  # wiki link -> replace with LangSwitch
          txt = ur"(Name\s*=\s*)\[\[[a-z\:]*[^\|]*\|"+Name+ur"\]\]"
          if (nLang==1):
            pywikibot.output('"%s", "%s"' % (self.name, NameLangSwitch))
          m=re.search(txt, new_creator_txt)  
          if m!=None: 
            new_creator_txt = re.sub(txt, ur'\1'+NameLangSwitch, new_creator_txt)
            message = message + msg
            Match = True
 
        if Match==False:  # Otherwise -> Add Name1 with LangSwitch
          new_creator_txt = re.sub(u'(\n\s*\|\s*)Name', ur'\1Name1 = '+NameLangSwitch+ur'\1Name', new_creator_txt)
          message = message + msg.replace(u'Name',u'Name1')
 
 
    # Add Linkback
    if (len(self.linkback)==0):
      new_creator_txt = re.sub(ur'(\n\s*\|\s*)(Homecat\s*=[^\n]*)\n', ur'\1\2\1Linkback          = {{subst:FULLPAGENAME}}\n', new_creator_txt)
      message = message + u'Add Linkback; '
    else:
      if self.linkback!=creator_page.title() and '&#39;' not in self.linkback:
        new_creator_txt = re.sub(u'(Linkback\s*=\s*)[^\n]*', ur'\1{{subst:FULLPAGENAME}}', new_creator_txt)
        message = message + u'Correct Linkback; '
 
    # Add Option
    if (len(self.option)==0):
      new_creator_txt = re.sub(ur'(\n\s*\|\s*)(Homecat\s*=[^\n]*)\n', ur'\1\2\1Option            = {{{1|}}} <!-- Do not modify -->\n', new_creator_txt)
      message = message + u'Add Option; '
    else:
      if ("{{{1|}}}" not in self.option):
        new_creator_txt = re.sub(ur'(Option\s*=)[^\n]*', ur'\1 {{{1|}}} <!-- Do not modify -->', new_creator_txt)
        message = message + u'Correct Option; '
 
    # Correct authority control   
    if (len(self.authority)>0 and "|bare=1" not in self.authority):
      new_creator_txt = re.sub(ur'(\{\{Authority control[^\}]*)\}\}',ur'\1|bare=1}}', new_creator_txt)
      message = message + u'Correct authority; '
 
    if (len(self.authority)==0 and u'Authority control' in cat_txt):
      m=re.search(ur'(\{\{Authority control[^\}]*\}\})', cat_txt)   # and look for homecat
      if m!=None:                                 # if found
        ac_str = m.group(1).strip()
        cat_txt = cat_page.get()
        new_cat_txt = cat_txt.replace(ac_str,'')
        if new_cat_txt != cat_txt:
          self.save(new_cat_txt, cat_page, "move Authority control from category to creator template")
        ac_str = ac_str.replace('}}', u'|bare=1}}')
        if ('authority' in creator_txt):
          new_creator_txt = re.sub(ur'(Authority\s*=\s*)[^\n]*', ur'\1'+ac_str, new_creator_txt)
        else:
          new_creator_txt = re.sub(ur'(\n\s*\|\s*)(Linkback\s*=[^\n]*)\n', ur'\1\2\1Authority         = %s\n'%ac_str, new_creator_txt)
          message = message + u'Add Authority control; '
 
    new_creator_txt, message = self.QueryCreatorLinks(creator_page, new_creator_txt, message)
 
    # save changed text if any
    if new_creator_txt != creator_txt:
      new_creator_txt = self.creator_cosmetic_changes(new_creator_txt) # call this function only if page is beeing changed
      self.save(new_creator_txt, creator_page, message)
 
  #=================================================================
  def run(self, generator):
    ''' Main loop. Loop over all Creator templates in
    Category:Creator templates and ...
 
    '''
 
    creator_regexp = { "homecat"    : "Homecat\s*=\s*([^\n]*)",
                       "option"     : "Option\s*=\s*([^\n]*)",
                       "sortkey"    : "Sortkey\s*=\s*([^\n]*)",
                       "deathdate"  : "Deathdate\s*=\s*([^\n]*)",
                       "birthdate"  : "Birthdate\s*=\s*([^\n]*)",
                       "deathyear1" : "Deathdate\s*=\s*(\d\d\d\d?)",
                       "birthyear2" : "Birthdate\s*=\s*(\d\d\d\d?)",
                       "deathyear"  : "Deathyear\s*=\s*(\d\d\d\d?)",
                       "birthyear"  : "Birthyear\s*=\s*(\d\d\d\d?)",
                       "authority"  : "Authority\s*=\s*(\{\{Authority control[^\}]*\}\})",
                       "name"       : "Name\s*=\s*([^\n]*)",
                       "name1"      : "Name1\s*=\s*([^\n]*)",
                       "nationality": "Nationality\s*=\s*([^\n]*)" ,
                       "type"       : "Type\s*=\s*([^\n]*)" ,
                       "linkback"   : "Linkback\s*=\s*([^\n]*)" ,
                       "wikisource" : "Wikisource\s*=\s*([^\n]*)" ,
                       "wikiquote"  : "Wikiquote\s*=\s*([^\n]*)" ,
                       "wikidata"   : "Wikidata\s*=\s*([^\n]*)" }
 
    site = pywikibot.getSite(u'commons', u'commons')
    skip = True # fast-forward to the page of interest
    for creator_page in generator:       # page from Category:Creator templates category
      if (creator_page.namespace()==100):                   # make sure it is of correct namespace
        if creator_page.title()=='Creator:Floris Balthasarsz. van Berckenrode':
          skip = False
        if skip:
          continue
        creator_str  = '{{'+creator_page.title()+'}}'       # creator string 
        creator_str_ = creator_str.replace(' ','_')         # creator string with "_" instead of spaces
        creator_txt  = creator_page.get()
        pywikibot.output(u'Getting %s:' % creator_str)
        self.init()
        templates = creator_page.templatesWithParams(string.replace(creator_txt, '{{{1|}}}', ''))
        for template in templates:                          # get all the templates
          #pywikibot.output(u'template[0]="%s"' % template[0])
          if (template[0] == 'Creator'):                    # make sure Creator template is present
            for param in template[1]:                       # go through all the template parameters
              #pywikibot.output(u'"%s"' % param)
              for field in creator_regexp.keys():
                m=re.search(creator_regexp[field], param)   # and look for homecat
                if m!=None:                                 # if found
                  setattr(self, field, m.group(1).strip())
                  #pywikibot.output(u'"%s"' % param)
                  #print '%s=%s\n' % (field, m.group(1).strip())
 
          if (template[0] == 'Authority control'):
            self.AC_template = template[1]
 
        m=re.search(creator_regexp['option'], creator_txt)   # and look for homecat
        if m!=None:                                 # if found
           self.option = m.group(1).strip()
 
        if len(self.homecat)>0:
          cat_name = 'Category:'+self.homecat
          pywikibot.output(u'   homecat = "%s"' % cat_name)
          pywikibot.output(u'   name = "%s"' % self.name)
          cat_page =catlib.Category(site, cat_name)
          if cat_page.exists():                        # if homecat exists
            if cat_page.isRedirectPage():
              cat_page = cat_page.getRedirectTarget()
              pywikibot.output(u"   Redirect -> %s" % cat_page.title(asLink=True))
            cat_txt = cat_page.get()                  # get the homecat text 
            #pywikibot.output(cat_txt_l)
            self.EditCreator (creator_txt, cat_txt, creator_page, cat_page)
            self.EditCategory(cat_txt, creator_page, cat_page)
 
 
 # ================================================             
def main():
    site = pywikibot.getSite(u'commons', u'commons')
    creator_cat = catlib.Category(site, u'Category:Creator templates')
    generator = creator_cat.articles(startFrom=None)       # page from Category:Creator templates category
    bot = CreatorMaintenanceBot()
    bot.run(generator)
 
 
if __name__ == "__main__":
    try:
        main()
    finally:
        pywikibot.stopme()