User:JarektUploadBot/UploadWGA.py

#!/usr/bin/python
# -*- coding: utf-8  -*-
'''
A program to upload all the images in the Web Gallery of Art website at http://www.wga.hu/
 
'''
import sys, os.path, glob, re, hashlib, base64, StringIO
sys.path.append("C:/Programs/pywikipedia/")
sys.path.append("../")
import wikipedia, upload, csv, urllib2, string

def processFile(row):
   # Read line of metadata
   enc='utf-8' 
   metadata = {
       'IMG_ID'          : int    (row.get(u'IMG_ID')              ),       
       'CREATOR'         : unicode(row.get(u'CREATOR')        , enc),       
       'DATE'            : unicode(row.get(u'DATE')           , enc),
       'TITLE'           : unicode(row.get(u'TITLE')          , enc),
       #'TITLE-original'  : unicode(row.get(u'TITLE-original') , enc),
       'DIMENSIONS'      : unicode(row.get(u'DIMENSIONS')     , enc),
       'TECHNIQUE'       : unicode(row.get(u'TECHNIQUE')      , enc),
       'FILENAME'        : unicode(row.get(u'FILENAME')       , enc),
       'FILENAME1'       : unicode(row.get(u'FILENAME1')      , enc),
       'FORM'            : unicode(row.get(u'FORM')           , enc),
       'TYPE'            : unicode(row.get(u'TYPE')           , enc),
       'SCHOOL'          : unicode(row.get(u'SCHOOL')         , enc),
       'TIMELINE'        : unicode(row.get(u'TIMELINE')       , enc),
       'INSTITUTION'     : unicode(row.get(u'INSTITUTION')    , enc),
       'CREATOR_CAT'     : unicode(row.get(u'CREATOR_CAT')    , enc),
       'INSTITUTION_CAT' : unicode(row.get(u'INSTITUTION_CAT'), enc),
       'TITLE_CAT'       : unicode(row.get(u'TITLE_CAT')      , enc),
       'DATE_CAT'        : unicode(row.get(u'DATE_CAT')       , enc),
       'URL'             : unicode(row.get(u'URL')            , enc),
       'IMAGEURL'        : unicode(row.get(u'IMAGEURL')       , enc),
       'FRAME'           : unicode(row.get(u'FRAME')          , enc),
       }
   metadata['FORM1']       = metadata['FORM'].capitalize();
   metadata['FILENAME1']   = metadata['FILENAME1'].strip();
   metadata['CREATOR']     = metadata['CREATOR'].strip();
   metadata['INSTITUTION'] = metadata['INSTITUTION'].strip();

   
   targetSite = wikipedia.getSite('commons', 'commons')
   filepath1  = u'C:/Documents and Settings/tuszynskij/My Documents/Downloads/WGA/%(FILENAME)s' % metadata
   filepath   = u'file:///C:/Documents and Settings/tuszynskij/My Documents/Downloads/WGA/%(FILENAME)s' % metadata
   #filename   = u'%(CREATOR_CAT)s - %(TITLE-original)s - ' % metadata + u'WGA%04i.jpg' % metadata['IMG_ID']
   filename   = metadata['FILENAME1']

   if not os.path.exists(filepath1):
       wikipedia.output(u'File not found: %s' % filepath1)
       return 

   # We don't want to upload duplicates
   # So take the photo, calculate the SHA1 hash and ask the mediawiki api for a list of duplicates.
   imageFile  = urllib2.urlopen(filepath).read()
   photo      = StringIO.StringIO(imageFile)
   hashObject = hashlib.sha1()
   hashObject.update(photo.getvalue())
   SHA1       = base64.b16encode(hashObject.digest())
   duplicates = targetSite.getFilesFromAnHash(SHA1)
   
   if duplicates:
      str = duplicates.pop()
      wikipedia.output(u'Duplicate image: %s' % str + u' = %(FILENAME)s' % metadata)
      return 
 
   # Format file description
   template = u"""{{Artwork     
  |artist           = %(CREATOR)s
  |title            = {{en|%(TITLE)s}}
  |description      = 
  |date             = %(DATE)s
  |medium           = %(TECHNIQUE)s
  |dimensions       = %(DIMENSIONS)s
  |institution      = %(INSTITUTION)s
  |location         = <!-- location within the gallery/museum -->     
  |references       =
  |object history   =
  |credit line      =
  |inscriptions     =
  |notes            = 
  |accession number =
  |source           = {{WGA link|ID=%(IMG_ID)s|pic-url=%(IMAGEURL)s|info-url=%(URL)s}}     
  |permission       = {{PD-art|PD-old-100}}
  |other_versions   =
}}
%(FRAME)s
{{WGA tag|%(FORM)s|%(TYPE)s|%(SCHOOL)s|%(TIMELINE)s}}     
{{subst:#ifexist:Category:%(FORM1)ss by %(CREATOR_CAT)s|[[Category:%(FORM1)ss by %(CREATOR_CAT)s]]|[[Category:%(CREATOR_CAT)s]]}}     
{{subst:#ifexist:Category:%(FORM1)ss in the %(INSTITUTION_CAT)s|[[Category:%(FORM1)ss in the %(INSTITUTION_CAT)s]]|[[Category:%(INSTITUTION_CAT)s]]}}         
[[Category:%(TITLE_CAT)s]]
[[Category:%(DATE_CAT)s]]
[[Category:WGA form: %(FORM)s]]
[[Category:WGA type: %(TYPE)s]]
[[Category:WGA School: %(SCHOOL)s]]
[[Category:WGA time period: %(TIMELINE)s]]
"""     
   description = template % metadata
   description = string.replace(description, "[[Category:]]\n", "")
   description = string.replace(description, "[[Category:]]", "")
   description = string.replace(description, "{{}}", "")
   description = string.replace(description, "[[Category: ", "[[Category:")
   description = string.replace(description, "cannvas", "canvas")
   description = string.replace(description, "\n\n", "\n")
   description = string.replace(description, "Paintings by  ", "Paintings by ")
   wikipedia.output("================================================================================" )    
   wikipedia.output(u'Preparing upload for %s' % filename )    
   wikipedia.output("================================================================================" )    
   #wikipedia.output(description)    
    
   # upload file to Commons
   bot = upload.UploadRobot(url=filepath, 
   	   description  = description, 
   	   useFilename  = filename, 
   	   keepFilename = True, 
   	   verifyDescription=False, 
   	   targetSite   = targetSite)
   bot.run()

        
def main(args):
   csvFile = 'WGA_batch2d.csv'
 
   reader = csv.DictReader(open(csvFile, "rb"), dialect='excel', delimiter=',')

   try:
    for row in reader:
        #print 'Row read successfully:', row
        processFile(row)


   except csv.Error, e:
     sys.exit('file %s, line %d: %s' % (csvFile, reader.line_num, e))
   
   
if __name__ == "__main__":
   try:
       main(sys.argv[1:])
   finally:
       print "All done!"