Usuário(a):GoEThe/Bloqueio.py
Aspeto
#!/usr/local/bin/python
# -*- coding: utf-8 -*-
"""
Wikipedia:Discussão de bloqueio - Robô para lista novos pedidos de desbloqueio automaticamente.
Este robô irá apanhar todos os novos pedidos da categoria
especificada e adicioná-los às páginas determinadas
substituindo a região especificada automaticamente.
Também fará uma lista dos casos abertos da mesma maneira.
Ajuste as variáveis para bater certo.
"""
import re, sys, string
sys.path.append('')
import wikipedia, catlib, config
#**************
#* Variáveis: *
#**************
# Description of 'terms' in the new and open dict()s:
# category - Category to draw list of new cases from.
# target - Target page to add list of new cases to.
# section -
# Section of target page to replace, which is delimited
# by <!-- BEGIN [section] --> <!-- END [section] -->
# titlemask -
# The title mask removes some portion of the title from
# the link list.
# exclude -
# Page exclusion regex. List the pages that you don't
# want to have included in the output list.
# Example:
# Template\:Medcab2$|Wikipedia\:Mediation Cabal\/Complaints$
# action - Update action text.
# New cases
new = dict()
new['category'] = '!Pedidos_de_desbloqueio'
new['target'] = 'Wikipédia:Pedidos a administradores/Discussão de bloqueio'
new['section'] = 'NewCases'
new['titlemask'] = r"^Wikipédia\:Pedidos a administradores\/Discussão de bloqueio\/"
new['exclude'] = r"^Template\:.*|^User:.*"
new['action'] = "A actualizar pedidos novos..."
# Open cases
open = dict()
open['category'] = '!2'
open['target'] = 'Wikipédia:Pedidos a administradores/Discussão de bloqueio'
open['section'] = 'OpenCases'
open['titlemask'] = r"^Wikipédia\:Pedidos a administradores\/Discussão de bloqueio\/"
open['exclude'] = r"^Template\:.*|^User:.*"
open['action'] = "A actualizar pedidos abertos"
# Description of 'terms' in the status dict():
# tmpl - Name of the status template.
status = dict()
status['tmpl'] = r'Mediação'
# This should be run infrequently, using relatively short delays for the processing.
wikipedia.get_throttle.setDelay(5) # 5 seconds
wikipedia.put_throttle.setDelay(5) # 5 seconds
# *******************
# * MedCabBot Class *
# *******************
class MedCabBot:
def __init__(self):
pass
def run(self):
if new['target'] == open['target']:
wikipedia.output(u'Processing Cases Lists')
page_target = wikipedia.Page(wikipedia.getSite(), new['target'])
page_data = page_target.get()
new_page_data = self.process_category(new, page_data)
open_page_data = self.process_category(open, new_page_data)
# Check if the page has changed at all.
if new_page_data != page_data or open_page_data != new_page_data:
# If it has, update.
action = u""
if page_data != new_page_data:
if new_page_data != open_page_data:
action = new['action'] + ' & ' + open['action']
else:
action = new['action']
else:
action = open['action']
wikipedia.output(u'Updating Cases Lists')
wikipedia.setAction(action)
page_target.put(open_page_data)
else:
# Otherwise, tell the user and exit.
wikipedia.output(u'Cases Lists are already up-to-date')
else:
wikipedia.output(u'Processing New Cases List')
page_target = wikipedia.Page(wikipedia.getSite(), new['target'])
page_data = page_target.get()
new_page_data = self.process_category(new, page_data)
if new_page_data != page_data:
wikipedia.output(u'Updating New Cases List')
wikipedia.setAction(new['action'])
page_target.put(new_page_data)
else:
# Otherwise, tell the user and exit.
wikipedia.output(u'New Cases List is already up-to-date')
wikipedia.output(u'Processing Open Cases List')
page_target = wikipedia.Page(wikipedia.getSite(), open['target'])
page_data = page_target.get()
open_page_data = self.process_category(open, page_data)
if open_page_data != page_data:
wikipedia.output(u'Updating Open Cases List')
wikipedia.setAction(open['action'])
page_target.put(open_page_data)
else:
# Otherwise, tell the user and exit.
wikipedia.output(u'Open Cases List is already up-to-date')
def process_category(self, pgt, page_data):
# Populate local variables
category = pgt['category']
section = pgt['section']
titlemask = pgt['titlemask']
exclude = pgt['exclude']
# Setup Regular Expressions used later.
exclude_regex = re.compile(exclude)
titlemask_regex = re.compile(titlemask)
# Create instance of catlib object and specify category.
cat = catlib.Category(wikipedia.getSite(), 'Categoria:' + category)
# Get array of pages in category.
pages = cat.articles()
#pages.reverse() # Change to descending date order
# Initialize variables.
total = 2
count = 0
# Check if there are any pages in the category.
if total == 0:
# If the number of pages is zero output status.
wikipedia.output('Categoria:' + category + ' está vazia, não está a fazer nada.')
return page_data
else:
# Initialize variables.
pagelist = u"\n"
# Otherwise, process the pages to produce a page.
wikipedia.output(u'A processar ' + str(total) + ' páginas de pedidos de desbloqueio.')
# Loop through all pages.
for page in pages:
title = page.title()
count = count + 1
# Check to see whether it's in the exclude list.
if exclude_regex.match(title):
wikipedia.output(str(count) + u' of ' + str(total) + ' ' + title + ' - Skipping')
else:
# If not in the exclude list, add to the pagelist.
# Output status line.
wikipedia.output(str(count) + u' of ' + str(total) + ' ' + title)
# Add the page title to the page list.
pagelist = pagelist + u'* [[' + title + '|' + titlemask_regex.sub('', title) + ']]'
hist = page.getVersionHistory()
print hist
# Get dict of parameters from status template.
params = self.get_tmpl_params(page, status['tmpl'])
if params:
if 'mediators' in params and params['mediators'] != '':
pagelist = pagelist + u' — Mediator(s): ' + params['mediators'] + "\n"
else:
pagelist = pagelist + u"\n"
if 'comment' in params:
if params['comment'] != '':
pagelist = pagelist + u'** Comment: ' + params['comment'] + "\n"
else:
pagelist = pagelist + u"\n"
# Finish the formatting of the pagelist.
pagelist = u'<!-- BEGIN ' + section + ' -->' + pagelist + '<!-- END ' + section + ' -->'
# Setup regex to find replaced region.
start = r'\<\!\-\- BEGIN ' + section + ' \-\-\>'
end = r'\<\!\-\- END ' + section + ' \-\-\>'
# Run replacement and place in new variable.
return re.compile(start + r'.*?' + end, re.S).sub(pagelist, page_data)
# WARNING: get_tmpl_params() is really scary.
# If there's an efficient regex for parsing out templates, I'd love to have it.
def get_tmpl_params(self, page, tmpl_name):
# Compile regexes.
tmpl_open = re.compile(r'\{\{', re.I | re.S)
tmpl_close = re.compile(r'\}\}', re.I | re.S)
# Get the case page data.
page_data = page.get()
# Strip Comments
page_data = re.sub(r'\<\!\-\-.*?\-\-\>', '', page_data)
# Find start of string.
m = re.compile(r'\{\{' + tmpl_name + '\W*?\|', re.I | re.S).search(page_data)
# Only do processing if the search was successful.
if m:
# Set the start point for the parameter list.
param_start = m.end()
# Set the end point for the parameter list, which will iterate up if
# subtemplates are found within the template definition.
param_end = tmpl_close.search(page_data, param_start).end()
# Set the temporary search results variable for the next template
# opening delimiter.
m = tmpl_open.search(page_data, param_start)
# Since this could fail, verify that this result can be compared.
if m:
# Set the param_open variable to the last found template
# opening delimiter.
param_open = m.end()
# While the end point for the parameter range is greater than
# the end point of the last search for a template opening
# delimiter we know that there is a subtemplate to identify.
# This assumes that the templates are properly nested.
while param_open < param_end:
# Set the temporary search results variable to the next
# template opening delimiter.
m = tmpl_open.search(page_data, param_end)
# Logic to set the param_open variable.
if m:
# Search was successful, set to end() value.
param_open = m.end()
else:
# Search was failure, exit loop.
param_end = tmpl_close.search(page_data, param_end).end()
break
# Sets the new end point for the parameter range.
param_end = tmpl_close.search(page_data, param_end).end()
# Remove the closing template delimiter.
param_end = param_end - 2
# Declare parameters dict()
params = dict()
# Loop through each parameter.
for param in re.split(r"\n[\|]*",page_data[param_start:param_end]):
# Only try splitting and adding to the params if not blank.
if param != '':
# Split only on the first equal sign.
temp = param.split('=', 1)
# Add entry for this parameter.
if len(temp) > 1:
params[temp[0].strip()] = temp[1].strip()
# Debugging output.
print params
# Return the dict()
return params
else:
# Failed, return the results of the failed match()
return m
if __name__ == "__main__":
try:
bot = MedCabBot()
bot.run()
finally:
wikipedia.stopme()