User:Pyfisch/Rob/source
< User:Pyfisch | Rob
import pywikibot
import re
commons = pywikibot.Site('commons', 'commons')
cat = pywikibot.Category(commons, 'Images by Rob Lavinsky')
members = cat.members(namespaces=[6], content=True)
same = {}
pattern_size = re.compile('[\d\.]*? x [\d\.]*? x [\d\.]*? cm')
pattern_loc = re.compile('http://www\.mindat.org/loc-.*?\.html')
for i in members:
size_obj = pattern_size.search(i.get())
loc_obj = pattern_loc.search(i.get())
size = ''
loc = ''
if size_obj: size = size_obj.group()
if loc_obj: loc = loc_obj.group()
key = (size, loc)
if 'http://www.mindat.org/photo-' in i.get():
page = 'mindat'
else:
page = 'irock'
if key in same:
same[key].append((i, page))
# if len(set([ x[1] for x in same[key]])) > 1 and size:
# print '====', key[0], key[1], '===='
# for k in same[key]:
# print '*', k[1], k[0]
else:
same[key] = [(i, page)]
#print ('#' * 70 + '\n') * 3
for i in same:
if len(set([ x[1] for x in same[i]])) > 1:
print '====', i[0], i[1], '===='
for k in same[i]:
print '*', k[1], k[0]
- Cheatset regexes to replace lists with galleries.
- Find: \* (.+?) \[\[:File:(.*?)\]\]
- Insert: \2 \| \1