File:Edit rates by users and bots on wikimedia Commons 2012-07-20.svg

Original file(SVG file, nominally 2,160 × 1,106 pixels, file size: 123 KB)

Captions

Captions

Add a one-line explanation of what this file represents

Summary edit

Description
English: Plots showing how edits rates evolve on Wikimedia Commons minute by minute over a course of 24 hours beginning 2012-07-20 00:00 (UTC). The edit are sorted into user edits and edits by bots. Individual bots, which never exceed 7 edits per minute is collapsed as Other bots (accumulated). Bots, which edit at higher edit rates during this period, are plotted individually. The total accumulated edit rate combining bots and users is also shown. The data has been gathered using SlaungerBot to iterate through all bot changes and non-bot changed during this time period, accumulate user changes over the same period and then plot the data using matplolib.
Date
Source Own work
Author Slaunger
SVG development
InfoField
 
The source code of this SVG is invalid due to 25 errors.
 
This W3C-invalid plot was created with Matplotlib.
Source code
InfoField

Python code

from calendar import timegm
from time import strptime, strftime, gmtime

from numpy import arange
from pylab import plot, xlabel, ylabel, legend, grid, title, show, axhline, figure

import wikipedia as pywikibot

bot_edit_rates = {}
user_edit_rates = {}
secs_per_min = 60
default_max_bot_edit_rate_per_minute = 6
datefmtstr = "%Y-%m-%dT%H:%M:%SZ"
rcstart = "2012-07-21T00:00:00Z"
rcend   = "2012-07-20T00:00:00Z"

def recent_changes_iter(rcstart, rcend, rcshow):
    interval = 60 * 60
    rcstart_utc_sec = timegm(strptime(rcstart, datefmtstr))
    rcend_utc_sec = timegm(strptime(rcend, datefmtstr))
    rc1 = rcstart_utc_sec
    rc2 = max(rcend_utc_sec, rc1 - interval)
    while True:
        rc1str, rc2str = strftime(datefmtstr, gmtime(rc1)), strftime(datefmtstr, gmtime(rc2))
        print rc2str,'-',rc1str
        for item in site.recentchanges(number=5000, rcshow=rcshow, 
                                       rcstart=rc1str,
                                       rcend=rc2str):
            yield timegm(strptime(item[1], datefmtstr)), item[4]
        if rc2 == rcend_utc_sec:
            break
        rc1 -= interval
        rc2 = max(rcend_utc_sec, rc1 - interval)        

try:
    site = pywikibot.getSite()
    for i, (utc_sec, bot_str) in enumerate(recent_changes_iter(rcstart, rcend, rcshow="bot")):
        utc_min = utc_sec / secs_per_min
        edit_rates = bot_edit_rates.setdefault(bot_str, {})
        if edit_rates.has_key(utc_min):
            edit_rates[utc_min] += 1
        else:
            edit_rates[utc_min] = 1
        if i == 0:
            last_bot_edit = utc_sec
    first_bot_edit = utc_sec

    for item in recent_changes_iter(rcstart, rcend, rcshow="!bot"):
        utc_sec = item[0]
        utc_min = utc_sec / secs_per_min
        if user_edit_rates.has_key(utc_min):
            user_edit_rates[utc_min] += 1
        else:
            user_edit_rates[utc_min] = 1
    
finally:
    pywikibot.stopme()
    
fast_bots = []
for bot_str, edit_rates in bot_edit_rates.iteritems(): 
    if max(edit_rates.itervalues()) > 7:
        fast_bots.append(bot_str)
fast_bots = frozenset(fast_bots)    
    
utc_min_minimum = first_bot_edit / 60 + 1
utc_min_maximum = last_bot_edit / 60 - 1
minutes_after_first_bot_edit = arange(utc_min_maximum + 1 - utc_min_minimum)

accum_edit_rate = []
other_bots_edit_rates = []

figure(figsize=(12,9))

for utc_min in xrange(utc_min_minimum, utc_min_maximum + 1):
    edit_rate = user_edit_rates.get(utc_min, 0)
    other_bots_edit_rate = 0
    for bot_str, ber in bot_edit_rates.iteritems():
        edit_rate += ber.get(utc_min, 0)
        if bot_str not in fast_bots:
            other_bots_edit_rate += ber.get(utc_min, 0)
    accum_edit_rate.append(edit_rate)
    other_bots_edit_rates.append(other_bots_edit_rate)
plot(minutes_after_first_bot_edit, accum_edit_rate, '-', label="All edits", lw=2, alpha = 0.7)

for bot_str in sorted(bot_edit_rates.iterkeys()):
    if bot_str in fast_bots:
        d = bot_edit_rates[bot_str]
        edit_rate_arr = [d.get(utc_min, 0) for utc_min in xrange(utc_min_minimum, utc_min_maximum + 1)]
        plot(minutes_after_first_bot_edit, edit_rate_arr, ls="-", lw=2.5, label=repr(bot_str)[2:-1], alpha=0.7)

axhline(default_max_bot_edit_rate_per_minute, ls='--', c="red", label="Default max bot edit rate")
plot(minutes_after_first_bot_edit, other_bots_edit_rates, label="Other bots (accumulated)", alpha=0.7)

edit_rate_arr = [user_edit_rates.get(utc_min, 0) for utc_min in xrange(utc_min_minimum, utc_min_maximum + 1)]
plot(minutes_after_first_bot_edit, edit_rate_arr, c='k', label="User edits", lw=1.5, alpha=0.7)


xlabel('Elapsed minutes')
ylabel('Edit rate [edits/min]')
title("Edit rates on Wikimedia Commons between %s and %s" % (rcend, rcstart))
legend()
grid()
show()

Licensing edit

I, the copyright holder of this work, hereby publish it under the following license:
w:en:Creative Commons
attribution share alike
This file is licensed under the Creative Commons Attribution-Share Alike 3.0 Unported license.
You are free:
  • to share – to copy, distribute and transmit the work
  • to remix – to adapt the work
Under the following conditions:
  • attribution – You must give appropriate credit, provide a link to the license, and indicate if changes were made. You may do so in any reasonable manner, but not in any way that suggests the licensor endorses you or your use.
  • share alike – If you remix, transform, or build upon the material, you must distribute your contributions under the same or compatible license as the original.

File history

Click on a date/time to view the file as it appeared at that time.

Date/TimeThumbnailDimensionsUserComment
current21:50, 24 July 2012Thumbnail for version as of 21:50, 24 July 20122,160 × 1,106 (123 KB)Slaunger (talk | contribs)

The following page uses this file: