Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
#!/usr/bin/python # -*- coding: utf-8 -*- Find monuments where a commons category exists, but no link is in the list yet.
Usage: # loop through all countries python missing_commonscat_links.py # work on specific country-lang python missing_commonscat_links.py -countrycode:XX -langcode:YY """
close_database_connection, connect_to_commons_database, connect_to_monuments_database )
"""Work on a single country.""" if not countryconfig.get('missingCommonscatPage'): # missingCommonscatPage not set, just skip silently. return { 'config': countryconfig, 'cmt': 'skipped: no missingCommonscatPage' } if not countryconfig.get('commonsTrackerCategory'): # commonsTrackerCategory not set, just skip silently. return { 'config': countryconfig, 'cmt': 'skipped: no commonsTrackerCategory' }
if countryconfig.get('type') == 'sparql': # This script does not (yet) work for SPARQL sources, skip silently return { 'config': countryconfig, 'cmt': 'skipped: cannot handle sparql' }
commonscatField = lookupSourceField('commonscat', countryconfig) if not commonscatField: # Field is missing. Something is seriously wrong, but we just skip it # silently return { 'config': countryconfig, 'cmt': 'skipped: no template field matched to commonscat!!' }
missingCommonscatPage = countryconfig.get('missingCommonscatPage') commonsTrackerCategory = countryconfig.get( 'commonsTrackerCategory'). replace(' ', '_')
withoutCommonscat = getMonumentsWithoutCommonscat( countryconfig.get('country'), countryconfig.get('lang'), conn, cursor) commonscats = getMonumentCommonscats( commonsTrackerCategory, conn2, cursor2)
pywikibot.log('withoutCommonscat {num} elements'.format( num=len(withoutCommonscat))) pywikibot.log('commonscats {num} elements'.format( num=len(withoutCommonscat)))
missing_commonscat = group_missing_commonscat_by_source( commonscats, withoutCommonscat, countryconfig)
site = pywikibot.Site(countryconfig.get('lang'), 'wikipedia') page = pywikibot.Page(site, missingCommonscatPage) iw_links = getInterwikisMissingCommonscatPage( countryconfig.get('country'), countryconfig.get('lang')) totals = output_country_report( missing_commonscat, commonscatField, page, iw_links)
return { 'report_page': page, 'config': countryconfig, 'total_cats': totals['cats'], 'total_pages': totals['pages'] }
iw_links=None, max_cats=1000): """ Format and output the missing commonscats data for a a single country.
@param missing_commonscat: the output of group_missing_commonscat_by_source @param commonscat_field: the template field used for adding a commonscat @param report_page: pywikibot.Page to which the report should be written @param iw_links: any interwiki links to append to the page. @param max_cats: the max number of categories to report to a page. Defaults to 1000. Note that actual number of images may be slightly higher in order to ensure all entries in a given list are presented. """ # People can add a /header template for with more info
else: '* <nowiki>|</nowiki> {field} = ' '[[:c:Category:{_name}|{name}]] - {id}\n'.format( field=commonscat_field, _name=cat_name, name=cat_name.replace('_', ' '), id=monument_id ) ) else:
'<!-- Maximum number of categories reached: {max}, ' 'total of missing commonscat links: {total} -->\n'.format( max=max_cats, total=totalCategories)) 'Commonscat links to be made in monument lists: ' '{max} (list maximum reached), ' 'total of missing commonscat links: {total}'.format( max=max_cats, total=totalCategories)) else: totalCategories)
'cats': totalCategories, 'pages': total_pages }
countryconfig): """Identify all unused images and group them by source page and id."""
catSortKey, withoutCommonscat) except ValueError: pywikibot.warning('Got value error for {0}'.format(catSortKey)) continue
withoutCommonscat.get(monumentId), countryconfig.get('type')) except ValueError: pywikibot.warning( 'Could not find source page for {0} ({1})'.format( monumentId, withoutCommonscat.get(monumentId))) continue
(commonscats.get(catSortKey), monumentId))
"""Lookup the source field of a destination.""" for field in countryconfig.get('fields'): if field.get('dest') == destination: return field.get('source')
"""Get interwiki link to missing_commonscat_page for the same country.""" lang=lang2, page=countryconfig.get('missingCommonscatPage'))
""" Retrieve all monuments in the database without commonscat.
@return dict of monuments without commonscat with id as key and source (list) as value. """ result = {}
query = ( "SELECT id, source " "FROM monuments_all " "WHERE (commonscat IS NULL or commonscat='') " "AND country=%s AND lang=%s")
cursor.execute(query, (countrycode, lang))
while True: try: row = cursor.fetchone() (id, source) = row # To uppercase, same happens in the other list result[id.upper()] = source except TypeError: break
return result
""" Retrieve all commons categories in the tracking category.
@return dict of commons categories with category_sort_key as key and category name as value. category_sort_key contains the monument id. """ query = ( "SELECT page_title, cl_sortkey_prefix " "FROM page " "JOIN categorylinks ON page_id=cl_from " "WHERE page_namespace=14 AND page_is_redirect=0 AND cl_to=%s")
cursor.execute(query, (commonsTrackerCategory,))
return common.process_sort_key_query_result(cursor)
"""Output the overall results of the bot as a nice wikitable.""" site, 'Commons:Monuments database/Missing commonscat links/Statistics')
('code', 'country'), ('lang', None), ('total', None), ('report_page', 'page'), ('row template', None), ('Commons template', None) ])
countryconfig.get('lang'), countryconfig.get('project', 'wikipedia'), countryconfig.get('rowTemplate'), site)
countryconfig.get('commonsTemplate'), )
as_link=True, with_ns=False, insite=site)
'code': countryconfig.get('country'), 'lang': countryconfig.get('lang'), 'total': total_cats_or_cmt, 'report_page': report_page, 'row template': row_template, 'Commons template': commons_template})
'Updating missing commonscat links statistics. ' 'Total missing links: {total_cats}'.format( total_cats=table.get_sum('total')))
countrycode = '' lang = '' skip_wd = False conn = None cursor = None # Connect database, we need that (conn, cursor) = connect_to_monuments_database() (conn2, cursor2) = connect_to_commons_database()
for arg in pywikibot.handleArgs(): option, sep, value = arg.partition(':') if option == '-countrycode': countrycode = value elif option == '-langcode': lang = value elif option == '-skip_wd': skip_wd = True else: raise Exception( 'Bad parameters. Expected "-countrycode", "-langcode", ' '"-skip_wd" or pywikibot args. ' 'Found "{}"'.format(option))
if countrycode and lang: if not mconfig.countries.get((countrycode, lang)): pywikibot.warning( 'I have no config for countrycode "{code}" in language ' '"{lang}"'.format(code=countrycode, lang=lang)) return False pywikibot.log( 'Working on countrycode "{code}" in language "{lang}"'.format( code=countrycode, lang=lang)) processCountry(mconfig.countries.get((countrycode, lang)), conn, cursor, conn2, cursor2) elif countrycode or lang: raise Exception('The "countrycode" and "langcode" arguments must ' 'be used together.') else: statistics = [] for (countrycode, lang), countryconfig in mconfig.filtered_countries( skip_wd=skip_wd): pywikibot.log( 'Working on countrycode "{code}" in language "{lang}"'.format( code=countrycode, lang=lang)) try: statistics.append(processCountry( countryconfig, conn, cursor, conn2, cursor2)) except Exception as e: pywikibot.error( 'Unknown error occurred when processing country ' '{0} in lang {1}\n{2}'.format(countrycode, lang, str(e))) statistics.append({ 'config': countryconfig, 'cmt': 'failed: unexpected error during processing' }) continue makeStatistics(statistics)
close_database_connection(conn, cursor)
pywikibot.log('Start of %s' % __file__) main() |