User:Panoramio Review Bot/panrb.py
<source lang="python">
- !/usr/bin/python
- -*- coding: utf-8 -*-
- Copyright (c) 2014 Zhuyifei1999
- Permission is hereby granted, free of charge, to any person
- obtaining a copy of this software and associated documentation
- files (the "Software"), to deal in the Software without
- restriction, including without limitation the rights to use,
- copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the
- Software is furnished to do so, subject to the following
- conditions:
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- OTHER DEALINGS IN THE SOFTWARE.
import os import re import time import urllib try:
from hashlib import sha1
except ImportError:
from sha import sha as sha1
import tempfile import pywikibot from pywikibot import catlib from pywikibot import pagegenerators
class Robot(pywikibot.Bot):
def __init__(self): self.site = pywikibot.getSite() self.idregex = [ re.compile(r"https?://(?:[^/]+\.)?panoramio\.com/photo/(\d+)/?", re.I), re.compile(r"https?://(?:commondatastorage\.googleapis\.com/)?static\.panoramio\.com/photos/(?:original|large|medium|small|thumbnail|square|mini_square)/(\d+)\.jpg", re.I), re.compile(r"https?://(?:[^/]+\.)?panoramio\.com/photo_explorer#view=photo&position=\d+&with_photo_id=(\d+)", re.I), re.compile(r"https?://(?:[^/]+\.)?google.com/[^/]*panoramio/photos/[^/]+/(\d+).jpg", re.I), ] self.authoregex = re.compile(r'<a href="[^"]+" rel="author">(.+?)</a>') self.review_template_regex = re.compile( r'(\{\{panoramioreview\}\})', re.S | re.I) self.cc_license_link = re.compile(r'https?\:\/\/creativecommons.org\/licenses\/(.*?)\/([0-9]\.[0-9])\/') self.cc_license_template_regex = re.compile( r'\{\{(cc\-by(?:\-sa)?(?:\-[0-9]\.[0-9])?)(?:\|.*?)?\}\}', re.S | re.I)
def getid(self): for regex in self.idregex: reobj = regex.search(self.text) if reobj: self.id = reobj.group(1) return 0 self.returndata = "no_panoramio_link", return 1
def run(self): for page in pagegenerators.CategorizedPageGenerator( catlib.Category(self.site, "Category:Panoramio_review_needed")): if page.namespace() != 6: continue self.page = pywikibot.ImagePage(page) self.review() self.save()
def review(self): self.text = self.page.get() # Cleanup self.id = self.author = self.returndata = () self.reupload = False
if self.getid(): return try: # FIXME: I'm very sorry but Panoramio has no good api self.html = urllib.urlopen("http://www.panoramio.com/photo/"+self.id).read().decode("utf-8")
if "
Photo Not Found
" in self.html:
self.returndata = "panoramio_not_found", return except: self.returndata = "size_not_found", # Should never happen return
self.author = self.authoregex.search(self.html).group(1)
if self.rev_sha1(): return if self.rev_license(): return if self.reupload: self.upload_hires()
def rev_license(self):
if '
- print "sim upload"
(self.site.username().replace("_", " "), self.returndata[0], self.author, self.id, time.strftime('%Y-%m-%d %H:%M:%S'), self.returndata[1], self.returndata[2]) text = self.review_template_regex.sub(tag, text) pywikibot.output(u'* %s %s' % (self.page.title(), " ".join(self.returndata))) comment = "Panoramio Review Bot: %s" % " ".join(self.returndata)
try: self.page.put(text, comment)
- self.userPut(self.page, text_o, text)
except KeyboardInterrupt: raise except Exception, e: pywikibot.output(u"Page %s not saved: %s" % (self.page.title(asLink=True), str(e)))
def main():
pywikibot.handleArgs() bot = Robot() bot.run()
if __name__ == "__main__":
try: main() finally: pywikibot.stopme()