python and oracle

This is a short guide to try and give you an idea as to how to install-and Oracle. Specifically to install the-add-on CX-Oracle.

Oracle Packages needed

We need to get some packages from Oracle - this requires

  • instantclient-basic-linux
  • instantclient-sdk-linux

Ubuntu Packages

sudo apt-get install build-essential unzip python-dev libaio-dev

Getting Started

Unzip the content in the same location, so you'll end up with a folder named: instantclient_11_2 (in my case) which will contain a bunch of .so and jar files.

just for ease of use I'll use $ORACLE_HOME which will basically point to the location where you unzipped your installclient folders.

export ORACLE_HOME=$(pwd)/instantclient_11_2
cd $ORACLE_HOME
ln -s libclntsh.so.11.1   libclntsh.so  #the version number on your .so file might be different

Update your /etc/profile or your ~/.bashrc

export ORACLE_HOME=/location/of/your/files/instantclient_11_2
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$ORACLE_HOME

Conf Update

 vi /etc/ld.so.conf.d/oracle.conf

This is a new file, simple add the location of your .so files here, then update the ldpath using

Python Time

Finaly just install cx_oracle module:

pip install cx_oracle

That then should be it !!

COde Example

Extract Gender

#------------------------------------------------------------------------------
# ReturnUnicode.py
#   Returns all strings as unicode. This also demonstrates the use of an output
# type handler to change the way in which data is returned from a cursor.
#------------------------------------------------------------------------------

import cx_Oracle
import pprint

def OutputTypeHandler(cursor, name, defaultType, size, precision, scale):
    if defaultType in (cx_Oracle.STRING, cx_Oracle.FIXED_CHAR):
        return cursor.var(unicode, size, cursor.arraysize)

connection = cx_Oracle.Connection("nrsext/nrsext@//172.28.5.1/CivilDb")
cursor = connection.cursor()
cursor.execute("select * from SEX")
for row in cursor:
    #pprint.pprint(row)
    print("Data %s"%(row[0]))
    print("Data %s"%(row[1]))
    print("Data %s"%(row[2]))

Combine Tables and produce XML

This is quite complex, using a dictionary - connect to multiple Tables - and add to a Python Dictionary using a UNIQUE_KEY.

import re
import cx_Oracle
import dicttoxml
import pprint
from xml.dom.minidom import parseString
import collections
import logging





def d2x(d):
    xstr="<Data>"
    for k in d:
        xstr+="\t<Person>"
        xstr+="\t\t<Id>"+str(k)+"</Id>"
        for k2 in d[k]:
            xstr+=str.format("\t\t\t<%s>%s</%s>"%(k2,d[k][k2],k2))
        xstr+="\t</Person>"
    xstr+="</Data>"
    return xstr



#--------------------------------------
class mysql2xml(object):
        ''' mysql2xml
            This package allows you to make multiple calls to database tables - and will group data based on a unique id.
            For example: If you have data with ID as the primary key in a Car, Plane and Boat set of tables....
            d=mysql2xml(usr='tim',db='Car_Plane_Boat')
            transport={}
            d.Read(transport,'Car_Table','ID')
            d.Read(transport,'Plane_Table','ID')
            d.Read(transport,'Boat_Table','ID')

            #The data is now all in a nested dictionary.... if you want an XML file then
            xml_str=d.ToXML(d)
        '''

        def __init__(self,usr,db,pwd='',host='',debug=0):
                ''' __init__ needs usr and db.
                    then optionally it can be given pwd(pasword), host, and debug (0 or 1)
                '''

                self.cnx  = cx_Oracle.Connection("nrsext/nrsext@//172.28.5.1/CivilDb")
                if debug==1:
                   print('Setting Debug')
                   logging.basicConfig(level=logging.DEBUG)
                   self.logger = logging.getLogger(__name__)
                else:
                   logging.basicConfig(level=logging.WARNING)
                   self.logger = logging.getLogger(__name__)
                self.logger.propagate = False


        def Read(self,dic_to_update,table,keyfield):
            ''' Read needs 3 parameters....
                dict - a dictionary that may or may not have data in it
                table - the table in your database
                keyfield - the name of the key field in your table

            A 'select * from <TABLE>' query is executed - and then keyfield is searched for in the columns found.
            the  keyfield value is used as the dictionary key - all columns are then added to a sub-dictionary (nested dictionary)
            '''

            cur = self.cnx.cursor()
            logging.info(str.format('Processing %s'%(table)))
            cur.execute(str.format('select * from %s '%(table)))
            res = cur.fetchall()
            index = self.GetPosition(cur, keyfield)
            if index != -1:
                for r in res:
                    key=r[index]
                    if key not in dic_to_update:
                       self.logger.debug("need to add key %s"%(key))
                       dic_to_update[key]={}
                    else:
                       self.logger.debug("Key %s exists"%(key))
                    for idx in range(len(r)):
                        if idx != index:
                            dic_to_update[key][cur.description[idx][0]]=r[idx]           
            else:
                logging.error("Warn: Key %s in table %s was not found"%(keyfield,table))
            return dic_to_update

        def GetPosition(self,cur, key):
            '''GetPosition: Internal function used to find the location index of a column.
            returns -1 if the column key can not be found.
            '''
            columns = cur.description
            for idx in range(len(columns)):
                if columns[idx][0] == key:
                    return idx
            return -1

        def ToXml(self,d):
            '''ToXml:
            d - a Dictionary - nested or not

            returns a string representing a valid XML file '''

            #logging.basicConfig(level=logging.CRITICAL) 
            #self.logger= logging.getLogger(__name__)
            #xml = dicttoxml.dicttoxml(d, custom_root='Data_Load')
            xml = dicttoxml.dicttoxml(d)
            dom = parseString(xml)
            return dom.toprettyxml()


if __name__ == "__main__":
        Person={}
        d=mysql2xml(usr='tim',db='WRC',debug=1)
        d.Read(Person,'CARD','CRN')
        d.Read(Person,'LABOUR_CARD','CIVIL_NUMBER')
        d.Read(Person,'PERSON','CIVIL_NUMBER')
        d.Read(Person,'PERSON_ADDRESS','CIVIL_NUMBER')
        d.Read(Person,'PERSON_PASSPORT','CIVIL_NUMBER')
        d.Read(Person,'PERSON_VISA','CIVIL_NUMBER')
        d.Read(Person,'FOUNDLING','CIVIL_NUMBER')
        d.Read(Person,'HISTORY_NAME','CIVIL_NUMBER')
        d.Read(Person,'ID_CARD','CIVIL_NUMBER')
        d.Read(Person,'DEATH','CIVIL_NUMBER')
        #As XML
        ofp=open('test.xml','w+')
        #pprint.pprint(Person)
        ofp.write("%s\n"%(d2x(Person)))
        ofp.close()
        #as pprint
        #pprint.pprint(Person)

This produces XML similar to this...