User:Edoderoobot/WD-taxon-description.py

From Wikidata
Jump to navigation Jump to search
import pywikibot
from pywikibot import pagegenerators
#import pywikibot.data.wikidataquery as wdquery
from pywikibot import pagegenerators as pg

import codecs #used in logfiles, unicoded strings
import sys
import datetime
from datetime import datetime, date, time

replacedesc={'nl':['taxon','',]}
taxondescs={
            'family of alga'            :{'nl':u'taxon, familie van algen'},
            'family of algae'           :{'nl':u'taxon, familie van algachtigen'},
            'family of arachnids'       :{'nl':u'taxon, familie van spinachtigen'},
            'family of birds'           :{'nl':u'taxon, familie van vogels'},
            'family of brachiopods'     :{'nl':u'taxon, familie van armpotigen'},
            'family of bryozoans'       :{'nl':u'taxon, familie van mosdiertjes'},
            'family of cnidarians'      :{'nl':u'taxon, familie van neteldieren'},
            'family of crustaceans'     :{'nl':u'taxon, familie van kreeftachtigen'},
            'family of echinoderms'     :{'nl':u'taxon, familie van stekelhuidigen'},
            'family of fishes'          :{'nl':u'taxon, familie van vissen'},
            'family of fungi'           :{'nl':u'taxon, familie van schimmels'},
            'family of gastrotrichs'    :{'nl':u'taxon, familie van buikharigen'},
            'family of insects'         :{'nl':u'taxon, familie van insecten'},
            'family of mammals'         :{'nl':u'taxon, familie van zoogdieren'},
            'family of molluscs'        :{'nl':u'taxon, familie van weekdieren'},
            'family of plants'          :{'nl':u'taxon, familie van planten'},
            'family of prokaryotes'     :{'nl':u'taxon, familie van prokaryoten'},
            'family of reptiles'        :{'nl':u'taxon, familie van reptielen'},
            'family of sea spiders'     :{'nl':u'taxon, familie van zeespinnen'},
            'family of waterbears'      :{'nl':u'taxon, familie van beerdiertjes'},
            'family of worms'           :{'nl':u'taxon, familie van wormen'},
                                     
            'genus of alga'             :{'nl':u'taxon, geslacht van algen'},
            'genus of algae'            :{'nl':u'taxon, geslacht van algachtigen'},
            'genus of amphibians'       :{'nl':u'taxon, geslacht van amfibieën'},
            'genus of arachnids'        :{'nl':u'taxon, geslacht van spinachtigen'},
            'genus of arthropods'       :{'nl':u'taxon, geslacht van duizendpoten'},
            'genus of birds'            :{'nl':u'taxon, geslacht van vogels'},
            'genus of brachiopods'      :{'nl':u'taxon, geslacht van armpotigen'},
            'genus of bryozoans'        :{'nl':u'taxon, geslacht van mosdiertjes'},
            'genus of cnidarian'        :{'nl':u'taxon, geslacht van neteldieren'},
            'genus of cnidarians'       :{'nl':u'taxon, geslacht van neteldieren'},
            'genus of crustaceans'      :{'nl':u'taxon, geslacht van kreeftachtigen'},
            'genus of echinoderms'      :{'nl':u'taxon, geslacht van stekelhuidigen'},
            'genus of fishes'           :{'nl':u'taxon, geslacht van vissen'},
            'genus of fungi'            :{'nl':u'taxon, geslacht van schimmels'},
            'genus of gastrotrichs'     :{'nl':u'taxon, geslacht van buikharigen'},
            'genus of insects'          :{'nl':u'taxon, geslacht van insecten'},
            'genus of mammals'          :{'nl':u'taxon, geslacht van zoogdieren'},
            'genus of molluscs'         :{'nl':u'taxon, geslacht van weekdieren'},
            'genus of myriapods'        :{'nl':u'taxon, geslacht van duizendpotigen'},
            'genus of plants'           :{'nl':u'taxon, geslacht van planten'},
            'genus of prokaryotes'      :{'nl':u'taxon, geslacht van prokaryoten'},
            'genus of reptiles'         :{'nl':u'taxon, geslacht van reptielen'},
            'genus of sea spiders'      :{'nl':u'taxon, geslacht van zeespinnen'},
            'genus of sponges'          :{'nl':u'taxon, geslacht van sponsachtigen'},
            'genus of trilobites'       :{'nl':u'taxon, geslacht van drielobbigen'},
            'genus of viruses'          :{'nl':u'taxon, geslacht van virussen'},
            'genus of waterbears'       :{'nl':u'taxon, geslacht van beerdiertjes'},
            'genus of worms'            :{'nl':u'taxon, geslacht van wormen'},

            'nothospecies of plant'     :{'nl':u'taxon, nothospecies van planten'},
            
            'order of alga'             :{'nl':u'taxon, orde van algen'},
            'order of algae'            :{'nl':u'taxon, orde van algachtigen'},
            'order of amphibians'       :{'nl':u'taxon, orde van amfibieën'},
            'order of arachnids'        :{'nl':u'taxon, orde van spinachtigen'},
            'order of birds'            :{'nl':u'taxon, orde van vogels'},
            'order of brachiopods'      :{'nl':u'taxon, orde van armpotigen'},
            'order of cnidarian'        :{'nl':u'taxon, orde van neteldieren'},
            'order of crustaceans'      :{'nl':u'taxon, orde van kreeftachtigen'},
            'order of fishes'           :{'nl':u'taxon, orde van vissen'},
            'order of fungi'            :{'nl':u'taxon, orde van schimmels'},
            'order of gastrotrichs'     :{'nl':u'taxon, orde van buikharigen'},
            'order of insects'          :{'nl':u'taxon, orde van insecten'},
            'order of mammals'          :{'nl':u'taxon, orde van zoogdieren'},
            'order of molluscs'         :{'nl':u'taxon, orde van weekdieren'},
            'order of plants'           :{'nl':u'taxon, orde van planten'},
            'order of prokaryotes'      :{'nl':u'taxon, orde van prokaryoten'},
            'order of reptiles'         :{'nl':u'taxon, orde van reptielen'},
            'order of sea spiders'      :{'nl':u'taxon, orde zeespinnen'},
            'order of waterbears'       :{'nl':u'taxon, orde van beerdiertjes'},
            'order of worm'             :{'nl':u'taxon, orde van wormen'},
                                     
            'tribe of arachnids'        :{'nl':u'taxon, geslachtengroep van spinachtigen'},
            'tribe of insects'          :{'nl':u'taxon, geslachtengroep van insecten'},
            'tribe of mammals'          :{'nl':u'taxon, geslachtengroep van zoogdieren'},
            'tribe of plants'           :{'nl':u'taxon, geslachtengroep van planten'},
            'tribe of reptiles'         :{'nl':u'taxon, geslachtengroep van reptielen'},
                                     
            'section of plants'         : {'nl':u'taxon, sectie van planten'},

            'series of plants'          : {'nl':u'taxon, reeks van planten'},
            
            'species of alga'           :{'nl':u'taxon, soort van algen'},
            'species of annelid'        :{'nl':u'taxon, soort van ringwormen'},
            'species of amphibian'      :{'nl':u'taxon, soort van amfibieën'},
            'species of arachnid'       :{'nl':u'taxon, soort van spinnen'},
            'species of arachnids'      :{'nl':u'taxon, soort van spinachtigen'},
            'species of arthropods'     :{'nl':u'taxon, soort van duizendpoten'},
            'species of bird'           :{'nl':u'taxon, soort van vogels'},
            'species of brachiopods'    :{'nl':u'taxon, soort van armpotigen'},
            'species of bryozoan'       :{'nl':u'taxon, soort van mosdiertjes'},
            'species of chordates'      :{'nl':u'taxon, soort van chordadieren'},
            'species of cnidarian'      :{'nl':u'taxon, soort van neteldieren'},
            'species of crustacean'     :{'nl':u'taxon, soort van kreefachtigen'},
            'species of ctenophore'     :{'nl':u'taxon, soort van ribkwallen'},
            'species of echinoderm'     :{'nl':u'taxon, soort van paddenstoelen'},
            'species of entoprocts'     :{'nl':u'taxon, soort van kelkwormen'},
            'species of gastrotrichs'   :{'nl':u'taxon, soort van buikharigen'},
            'species of insect'         :{'nl':u'taxon, soort van insecten'},
            'species of fungus'         :{'nl':u'taxon, soort van schimmels'},
            'species of fish'           :{'nl':u'taxon, soort van vissen'},
            'species of mammal'         :{'nl':u'taxon, soort van zoogdieren'},
            'species of mollusc'        :{'nl':u'taxon, soort van weekdieren'},
            'species of myriapod'       :{'nl':u'taxon, soort van duizendpotigen'},
            'species of plant'          :{'nl':u'taxon, soort van planten'},
            'species of prokaryote'     :{'nl':u'taxon, soort van prokaryoten'},
            'species of reptile'        :{'nl':u'taxon, soort van reptielen'},
            'species of rotifers'       :{'nl':u'taxon, soort van raderdieren'},
            'species of sea spiders'    :{'nl':u'taxon, soort van zeespinnen'},
            'species of sponge'         :{'nl':u'taxon, soort van sponsachtigen'},
            'species of waterbears'     :{'nl':u'taxon, soort van beerdiertjes'},
            'species of virus'          :{'nl':u'taxon, soort van virussen'},
            'species of worm'           :{'nl':u'taxon, soort van wormen'},
                                     
            'spider family'             :{'nl':u'taxon, familie van spinachtigen'},
                                     
            'subfamily of arachnids'    :{'nl':u'taxon, onderfamilie van spinachtigen'},
            'subfamily of birds'        :{'nl':u'taxon, onderfamilie van vogels'},
            'subfamily of crustaceans'  :{'nl':u'taxon, onderfamilie van kreeftachtigen'},
            'subfamily of fishes'       :{'nl':u'taxon, onderfamilie van vissen'},
            'subfamily of insects'      :{'nl':u'taxon, onderfamilie van insecten'},
            'subfamily of mammals'      :{'nl':u'taxon, onderfamilie van zoogdieren'},
            'subfamily of molluscs'     :{'nl':u'taxon, onderfamilie van weekdieren'},
            'subfamily of plants'       :{'nl':u'taxon, onderfamilie van planten'},
            'subfamily of reptiles'     :{'nl':u'taxon, onderfamilie van reptielen'},
            'bird'                      :{'nl':u'taxon, vogel'},
            
            'subgenus of insects'       :{'nl':u'taxon, ondergeslacht van insecten'},
            'subgenus of mammals'       :{'nl':u'taxon, ondergeslacht van zoogdieren'},
            'subgenus of plants'        :{'nl':u'taxon, ondergeslacht van planten'},

            'subtribe of insects'       :{'nl':u'taxon, ondertribus van insecten'},
            'subtribe of plants'        :{'nl':u'taxon, ondertribus van planten'},

            'superfamily of insects'    :{'nl':u'taxon, superfamilie van insecten'},
            'superfamily of molluscs'   :{'nl':u'taxon, superfamilie van weekdieren'},
            'superfamily of plants'     :{'nl':u'taxon, superfamilie van planten'},
            
            'variety of algae'          :{'nl':u'taxon, variëteit van algachtigen'},
            'variety of plants'         :{'nl':u'taxon, variëteit van planten'},

            
            'x!y~z':{'nl':''},
           }

debugedo=False
debugedo=True
debug=True

default_query='claim[31:16521]'  #all taxons
default_language = 'nl' 

#global variables
items2do = 0
itemsdone= 0
missing_dict={}


def log_premature(itemno):
  with codecs.open("taxon-description.prelog.csv","a", encoding="utf-8") as logfile:
    logfile.write('%s\n' % (itemno))
  logfile.close
def logme(verbose, formatstring, *parameters):
  with codecs.open("taxon-description.log.csv", "a", encoding="utf-8") as logfile:
    formattedstring = u'%s%s' % (formatstring, '\n')
       
    try:   
      logfile.write(formattedstring % (parameters) )
    except :
      exctype, value = sys.exc_info()[:2]
      print("1) Error writing to logfile on: [%s] [%s]" % (exctype, value))
      verbose = True    #now I want to see what!   
    logfile.close()
  if verbose:
    print(formatstring % (parameters))  

def action_one_item(wditem):
  global items2do
  global itemsdone
  global missing_dict
  
  items2do -= 1
  str1 = '{:>10d}'.format(itemsdone)
  str2 = '{:>10}'.format(wditem.title())
  str3 = '{:>10d}'.format(items2do)
  sys.stdout.write("\r%s%s%s" % (str1, str3, str2))     #print how many items we still have to do ... 
     
  if ('nl' in wditem.descriptions):
    orig_desc = wditem.descriptions['nl']
  else:
    orig_desc = ''  
     
  if ('en' in wditem.descriptions):
    en_desc = wditem.descriptions['en']
    if (en_desc in taxondescs):
      data = {}
      my_dict=taxondescs[en_desc]
      for lang in my_dict:
        if lang in wditem.descriptions:
          if (wditem.descriptions[lang] in replacedesc[lang]):
            data.update({'descriptions':{lang:my_dict[lang]}})
            if debug:
              print('Debug: %s' % data)
            else:
              try:
                log_premature(wditem.title())
                wditem.editEntity(data,summary=u'WD-taxon-description.py [[User:Edoderoobot/WD-taxon-description.py|source]]')
                logme(False,'%s|%s|%s|%s|%s|%s',datetime.now().strftime("%Y-%b-%d/%H:%M:%S"),wditem.title(),'nl',orig_desc,my_dict[lang],'taxon-descript')
              except:
                pass              
    else:
      if en_desc in missing_dict:
        missing_dict[en_desc] += 1
      else:
        missing_dict.update({en_desc:1})
    return 1     
    
  return 0
        

def wd_sparql_generator(query):        
  wikidatasite=pywikibot.Site('wikidata','wikidata') 
  generator=pg.WikidataSPARQLPageGenerator(query,site=wikidatasite)
  for wd in generator:
    wd.get(get_redirect=True)
    yield wd

def wd_from_file():
  repo=pywikibot.Site('wikidata','wikidata').data_repository()
  csvfile=open('taxa4.csv','r')
  for alllines in csvfile:
    qitem=alllines[alllines.find('Q'):alllines.find(',')]
    if (len(qitem)>0):
      wditem=pywikibot.ItemPage(repo,qitem)
      if (not(wditem.isRedirectPage())):
       if wditem.exists():
        wditem.get(get_redirect=True)
        yield wditem
    
        
def main():
  
    global itemsdone
    itemsdone = 0
    
    print ("main")
    query = default_query #later, I want to manage this with params
    lng = default_language

    pigenerator = wd_sparql_generator(u'select ?item where {?item wdt:P31 wd:Q16521}')
    #pigenerator = wd_from_file()
    for wditem in pigenerator:
      action_one_item(wditem)
      itemsdone += 1
      #if itemsdone > 25  : break
    
    print('Items done: %s' % itemsdone)
    
  
if __name__ == "__main__":  
 if debugedo:
   print("debug is on")
   site=pywikibot.Site('nl')
   repo=site.data_repository()
   wd = pywikibot.ItemPage(repo,'Q17979303')
   wd.get(get_redirect=True)
   action_one_item(wd)
 else:
   print("Klaar voor de start")
   main()