#!/usr/bin/python
# -*- coding: UTF-8 -*-
import shelve
import urllib
import simplejson
import time
import re
import mwclient
Howmany=200
Whichcategory='Category:Images that should use vector graphics'
Wheretosave=u'Top 200 Images that should use vector graphics by usage'
shelffile="./catlistcount.cache"
apiurl="http://commons.wikimedia.org/w/api.php"
apiparams={'format': "json",
'action': "query",
'list': "categorymembers",
'cmlimit': "50",
'cmprop': 'title',
'cmtitle': Whichcategory
}
checkusageurl="http://toolserver.org/~daniel/WikiSense/CheckUsage.php"
checkusageparams={'i': , #filename
'w': '_wp_20', #which wikis to check (top 20 wikipedias not to kill server)
'x': 'main', #what kind of pages
'r': 'on', #RAW
'b': '1' # not Bulk, we check 1 by 1
}
wikire=re.compile('\s*\[([^\]]*)\]\s*(\d*)')
datastore=shelve.open(shelffile, writeback=True)
if not ("items" in datastore): #newfile
datastore["items"]={}
### datastore["wikis"]={}
datastore["all-done"] = False
else:
apiparams["cmcontinue"]=datastore["query-continue"] #pick up where we left last time
while not datastore["all-done"]:
checkusageparams['i']=
query=urllib.urlopen(apiurl,urllib.urlencode(apiparams))
data=simplejson.load(query)
for item in data["query"]["categorymembers"]:
Fname=item["title"].split(':')[-1]
Fname=Fname.encode('UTF-8').replace(' ','_')
datastore["items"][Fname]={"ns": item["ns"]}
if item["ns"] == 6: #pick out Image:
checkusageparams['i'] += Fname+"\n"
datastore["items"][Fname]["countof"]={}
datastore["items"][Fname]["counttotal"]=0
datastore["items"][Fname]["checked"]=False
print "Added: "+Fname
datastore.sync()
print "--- Cached Data ---"
print "From: "+data["query"]["categorymembers"][0]["title"]
print "To: "+data["query"]["categorymembers"][-1]["title"]
if "query-continue" in data:
apiparams["cmcontinue"]=data["query-continue"]["categorymembers"]["cmcontinue"].encode("UTF-8")
datastore["query-continue"] =apiparams["cmcontinue"]
datastore.sync()
else:
datastore["all-done"] =True
query=urllib.urlopen(checkusageurl,urllib.urlencode(checkusageparams))
for line in query.readlines():
sulfarini=wikire.match(line)
if sulfarini != None:
print sulfarini.group(1)+" : "+sulfarini.group(2)
whichwiki=sulfarini.group(1)
else:
try:
page,file=line.split()
except:
continue
if whichwiki in datastore["items"][file]["countof"]:
datastore["items"][file]["countof"][whichwiki]+=1
### datastore["wikis"][whichwiki]+=1
else:
datastore["items"][file]["countof"][whichwiki]=1
### datastore["wikis"][whichwiki]=1
datastore["items"][file]["counttotal"]+=1
print file+","+whichwiki+","+ str(datastore["items"][file]["countof"][whichwiki])+","+str(datastore["items"][file]["counttotal"])
datastore.sync()
time.sleep(2)
datastore["wikis"]={}
ftotal=0
for item in datastore["items"].itervalues():
if item['ns']==6:
ftotal+=int(item["counttotal"])
for w,c in item['countof'].iteritems():
if w in datastore["wikis"]:
datastore["wikis"][w]+=c
else:
datastore["wikis"][w]=c
output= """
This Page is an Automatically generated list of the 200 most used
Images that should use vector graphics
The code for making this list is available here
The images are only checkd for use in Articles (not talk pages etc.) on the 20 largest wikipedias
--Inkwina (talk · contribs)
"""
output +="\nLast Update "+time.strftime("%a, %d %b %Y %H:%M:%S %Z")+"\n"
wtotal=0
for x in datastore["wikis"].itervalues(): wtotal+=int(x)
output += "\n*Items in Total: "+str(len(datastore["items"]))
output += "\n**Total use(from wikis) : "+str(wtotal)
output += "\n**Total use(from files) : "+str(ftotal)
output += "\n----\n"
wikisort = [(v, k) for k, v in datastore["wikis"].items()]
wikisort.sort()
wikisort.reverse()
for w,v in wikisort:
output += "\n# "+str(v)+": "+str(w)
def mycmp(x,y):
# print x+" : "+str(datastore["items"][x]["counttotal"])
return cmp(datastore["items"][x]["counttotal"],datastore["items"][y]["counttotal"])*-1
sortall=[x for x in datastore["items"] if datastore["items"][x]["ns"]==6]
sortall.sort(mycmp)
output += "\n
\n"
#print output
site = mwclient.Site('commons.wikimedia.org')
site.login("username", "passwd")
page = site.Pages[Wheretosave]
page.save(output, summary = u'Inkwina Bot Update')