Python ExamplesΒΆ

The following example scripts require that cx_Oracle has been installed. The search examples require that test data has been loaded, and domain indexes have been built.

An example molecule loader:

#!/usr/bin/env python

from openeye.oechem import *
import os
import sys
import cx_Oracle
import getpass

ORACLE_SID = None
if os.environ['ORACLE_SID']:
    ORACLE_SID = os.environ['ORACLE_SID']

def HandleException(exc):
    error, = exc.args
    print >> sys.stderr, "Oracle-Error-Code:", error.code
    print >> sys.stderr, "Oracle-Error-Message:", error.message


def LoadMolecules(ifs,password):
    conn = None
    cursor = None

    try:
        conn = cx_Oracle.connect("ARNACHM1_TEST", password, ORACLE_SID)

        # First create the test table.
        cursor = conn.cursor()
        cursor.execute("create table test (title varchar2(100), smiles varchar2(4000) not null)")
        cursor.close()
        print "Created test table"

        # Next load the molecules.
        cursor = conn.cursor()
        sql = "insert into test(title, smiles) values (:title, :smiles)"

        count = 0
        for mol in ifs.GetOEGraphMols():
            title = mol.GetTitle()
            ism = OECreateIsoSmiString(mol)
            cursor.execute(sql, title=title, smiles=ism)
            count += 1
            if count % 1000 == 0:
                conn.commit()
                sys.stdout.write("Loaded %s molecules (so far)\r" % count)
                sys.stdout.flush()

        print "Loaded", count, "molecules (complete)"

        cursor.close()
        conn.commit()

        # Finally create the domain index.
        cursor = conn.cursor()
        cursor.execute(
            "create index test_structure_idx \
            on test(smiles) \
            indextype is c$arnachm1.structureIndexType")
        print "Created index test_structure_idx"

    except cx_Oracle.DatabaseError, exc:
        HandleException(exc)

    finally:
        if cursor:
            cursor.close()
        if conn:
            conn.close()


InterfaceData = """
!BRIEF [-i] <input> [-p password]
!PARAMETER -i 1
  !ALIAS -in
  !TYPE string
  !REQUIRED true
  !BRIEF Input file name
  !KEYLESS 1
!END
!PARAMETER -p 2
  !TYPE string
  !REQUIRED false
  !BRIEF oracle password
!END
"""

def main(argv=[__name__]):

    if not ORACLE_SID:
        print >> sys.stderr, "ORACLE_SID not defined.  Please set \
                              the ORACLE_SID environment variable"
        sys.exit(1)

    itf = OEInterface(InterfaceData, argv)

    password = None
    if itf.HasString('-p'):
        password = itf.GetString('-p')
    elif not password:
        password = getpass.getpass()

    ifs = oemolistream()
    filename = itf.GetString("-i")
    if not ifs.open(filename):
        OEThrow.Fatal("Unable to open %s for reading" % filename)

    LoadMolecules(ifs,password)


if __name__ == "__main__":
    sys.exit(main(sys.argv))

An example of an exact match search:

#!/usr/bin/env python

from openeye.oechem import *
import cx_Oracle
import os
import sys
import getpass

ORACLE_SID = None
if os.environ['ORACLE_SID']:
    ORACLE_SID = os.environ['ORACLE_SID']

def HandleException(exc):
    error, = exc.args
    print >> sys.stderr, "Oracle-Error-Code:", error.code
    print >> sys.stderr, "Oracle-Error-Message:", error.message


def ExactMatchSearch(ifs,ofs,password,tableName,columnName):

    hitCount = 0
    queryCount = 0
    outmol = OEGraphMol()
    conn = None
    cursor = None

    try:
        conn = cx_Oracle.connect("ARNACHM1_TEST", password, ORACLE_SID)
        cursor = conn.cursor()

        sql = "select :column, title from %s \
               where c$arnachm1.exactMatch(:column, :query, -1) = 1"\
               % tableName

        for mol in ifs.GetOEGraphMols():
            queryCount += 1
            query = OEMolToSmiles(mol)

            cursor.execute(sql, query=query,column=columnName)

            while True:
                rows = cursor.fetchmany();
                if rows == []:
                    break
                for data in rows:
                    hitCount += 1
                    if ofs.GetFormat() == OEFormat_SMI:  #write smiles
                        line = "%s\t%s\n" % (data[0],data[1])
                        ofs.write(line,len(line))
                    else:
                        smi = data[0]
                        id = str(data[1])
                        OESmilesToMol(outmol,smi)
                        outmol.SetTitle(id)
                        OEWriteMolecule(ofs, outmol)

    except cx_Oracle.DatabaseError, exc:
        HandleException(exc)

    finally:
        if cursor:
            cursor.close()
        if conn:
            conn.close()

    print "Found", hitCount, "exact matches for", queryCount, "queries"
    ifs.close()
    ofs.close()


InterfaceData = """
!BRIEF [-i] <input> [[-o] <output>] [-p password]
!PARAMETER -i 1
  !ALIAS -in
  !TYPE string
  !REQUIRED true
  !BRIEF Input file name
  !KEYLESS 1
!END
!PARAMETER -p 2
  !TYPE string
  !REQUIRED false
  !BRIEF oracle password
!END
!PARAMETER -o 3
  !ALIAS -out
  !TYPE string
  !BRIEF Output file name
  !KEYLESS 2
!END
!PARAMETER -table_name 4
  !ALIAS -tname
  !TYPE string
  !DEFAULT test
  !REQUIRED false
  !BRIEF name of Oracle table to be searched
!END
!PARAMETER -column_name 5
  !ALIAS -cname
  !TYPE string
  !DEFAULT smiles
  !REQUIRED false
  !BRIEF name of Oracle column to be searched
!END
"""


def main(argv=[__name__]):

    if not ORACLE_SID:
        print >> sys.stderr, "ORACLE_SID not defined.  Please set \
                              the ORACLE_SID environment variable"
        sys.exit(1)

    itf = OEInterface(InterfaceData, argv)

    ifs = oemolistream()
    filename = itf.GetString("-i")
    if not ifs.open(filename):
        OEThrow.Fatal("Unable to open %s for reading" % filename)

    ofs = oemolostream()
    filename = itf.GetString("-o")
    if not ofs.open(filename):
        OEThrow.Fatal("Unable to open %s for writing" % filename)

    password = None
    if itf.HasString('-p'):
        password = itf.GetString('-p')
    elif not password:
        password = getpass.getpass()

    tableName = itf.GetString('-tname')
    columnName = itf.GetString('-cname')

    ExactMatchSearch(ifs,ofs,password,tableName,columnName)


if __name__ == "__main__":
    sys.exit(main(sys.argv))

An example of a similarity search:

#!/usr/bin/env python

from openeye.oechem import *
import os
import sys
import cx_Oracle
import getpass

ORACLE_SID = None
if os.environ['ORACLE_SID']:
    ORACLE_SID = os.environ['ORACLE_SID']

def HandleException(exc):
    error, = exc.args
    print >> sys.stderr, "Oracle-Error-Code:", error.code
    print >> sys.stderr, "Oracle-Error-Message:", error.message


def SimilaritySearch(ifs, cutoff, password, tableName = 'test'):
    conn = None
    cursor = None

    try:
        conn = cx_Oracle.connect("ARNACHM1_TEST", password, ORACLE_SID)
        cursor = conn.cursor()

        sql = "select smiles, title, c$arnachm1.similarityScore(1) from %s \
                where c$arnachm1.similarity(smiles, :query, :cutoff, -1, 1) = 1" % tableName


        count = 1
        work_dir = os.getcwd()
        outmol = OEGraphMol()

        for mol in ifs.GetOEGraphMols():
            query = OEMolToSmiles(mol)
            cursor.execute(sql, query=query, cutoff=cutoff)
            output_filename = os.path.join(work_dir,
                                           "sim-hits-%03d.sdf" % count)
            oms = oemolostream(output_filename)
            hit_count = 0;

            while True:
                rows = cursor.fetchmany();
                if rows == []:
                    break
                for data in rows:
                    smi = data[0]
                    title = data[1]
                    if title == None:
                        title = "Hit %i" % (hit_count+1)
                    sim = data[2]
                    outmol.Clear()
                    OESmilesToMol(outmol,smi)
                    outmol.SetTitle(title)
                    OESetSDData(outmol, "SIM", "%.2f" % sim)
                    OEWriteMolecule(oms, outmol)
                    hit_count += 1

            print "Found", hit_count, "molecules with sim >=", cutoff, \
                  "for query", query, "output to", output_filename
            oms.close()
            count += 1

    except cx_Oracle.DatabaseError, exc:
        HandleException(exc)

    finally:
        if cursor:
            cursor.close()
        if conn:
            conn.close()


InterfaceData = """
!BRIEF [-i] <input> [-c] <cutoff> [-p password]
!PARAMETER -i 1
  !ALIAS -in
  !TYPE string
  !REQUIRED true
  !BRIEF Input file name
  !KEYLESS 1
!END
!PARAMETER -c 2
  !ALIAS -cutoff
  !TYPE double
  !REQUIRED true
  !BRIEF Tanimoto similarity cutoff
  !KEYLESS 2
!END
!PARAMETER -p 3
  !TYPE string
  !REQUIRED false
  !BRIEF oracle password
  !KEYLESS 3
!END
"""


def main(argv=[__name__]):

    if not ORACLE_SID:
        print >> sys.stderr, "ORACLE_SID not defined.  Please set \
                              the ORACLE_SID environment variable"
        sys.exit(1)

    itf = OEInterface(InterfaceData, argv)

    ifs = oemolistream()
    filename = itf.GetString("-i")
    if not ifs.open(filename):
        OEThrow.Fatal("Unable to open %s for reading" % filename)

    password = None
    if itf.HasString('-p'):
        password = itf.GetString('-p')
    elif not password:
        password = getpass.getpass()

    cutoff = itf.GetDouble('-c')

    SimilaritySearch(ifs,cutoff,password)


if __name__ == "__main__":
    sys.exit(main(sys.argv))

A substructure search using a SMARTS query:

#!/usr/bin/env python

from openeye.oechem import *
import os
import sys
import cx_Oracle
import getpass

ORACLE_SID = None
if os.environ['ORACLE_SID']:
    ORACLE_SID = os.environ['ORACLE_SID']


def HandleException(exc):
    error, = exc.args
    print >> sys.stderr, "Oracle-Error-Code:", error.code
    print >> sys.stderr, "Oracle-Error-Message:", error.message


def SmartsSubstructureSearch(smarts,ofs,password,tableName='test'):

    hitCount = 0
    outmol = OEGraphMol()
    conn = None
    cursor = None

    try:
        conn = cx_Oracle.connect("ARNACHM1_TEST", password, ORACLE_SID)
        cursor = conn.cursor()

        sql = "select smiles, title from %s \
               where c$arnachm1.substructure(smiles, :smarts, -1) = 1" \
               % tableName
        cursor.execute(sql, smarts=smarts)

        while True:
            rows = cursor.fetchmany();
            if rows == []:
                break
            for data in rows:
                hitCount += 1
                if ofs.GetFormat() == OEFormat_SMI:  #write smiles
                    line = "%s\t%s\n" % (data[0],data[1])
                    ofs.write(line,len(line))
                else:
                    smi = data[0]
                    id = str(data[1])
                    OESmilesToMol(outmol,smi)
                    outmol.SetTitle(id)
                    OEWriteMolecule(ofs, outmol)

        print "Found", hitCount, "matches"

    except cx_Oracle.DatabaseError, exc:
        HandleException(exc)

    finally:
        if cursor:
            cursor.close()
        if conn:
            conn.close()
        ofs.close()


InterfaceData = """
!BRIEF [-s] <SMARTS> [[-o] <output>] [-p password]
!PARAMETER -s 1
  !TYPE string
  !REQUIRED true
  !BRIEF SMARTS pattern
  !KEYLESS 1
!END
!PARAMETER -o 2
  !ALIAS -out
  !TYPE string
  !BRIEF Output file name
  !KEYLESS 2
!END
!PARAMETER -p 3
  !TYPE string
  !REQUIRED false
  !BRIEF oracle password
!END
"""


def main(argv=[__name__]):

    if not ORACLE_SID:
        print >> sys.stderr, "ORACLE_SID not defined.  Please set \
                              the ORACLE_SID environment variable"
        sys.exit(1)

    itf = OEInterface(InterfaceData, argv)

    ofs = oemolostream()
    filename = itf.GetString("-o")
    if not ofs.open(filename):
        OEThrow.Fatal("Unable to open %s for writing" % filename)

    password = None
    if itf.HasString('-p'):
        password = itf.GetString('-p')
    elif not password:
        password = getpass.getpass()

    smarts = itf.GetString("-s")

    SmartsSubstructureSearch(smarts,ofs,password)


if __name__ == "__main__":
    sys.exit(main(sys.argv))

A substructure search using a MOL file query:

#!/usr/bin/env python

from openeye.oechem import *
import os
import sys
import cx_Oracle
import getpass

ORACLE_SID = None
if os.environ['ORACLE_SID']:
    ORACLE_SID = os.environ['ORACLE_SID']

def HandleException(exc):
    error, = exc.args
    print >> sys.stderr, "Oracle-Error-Code:", error.code
    print >> sys.stderr, "Oracle-Error-Message:", error.message


def MDLSubstructureSearch(mdl,ofs,password,tableName='test'):

    hitCount = 0
    outmol = OEGraphMol()
    conn = None
    cursor = None

    try:
        conn = cx_Oracle.connect("ARNACHM1_TEST", password, ORACLE_SID)
        cursor = conn.cursor()

        sql = "select smiles, title from %s where \
               c$arnachm1.substructure(smiles, :mdl_clob, -1) = 1" \
               % tableName
        clob = cursor.var(cx_Oracle.CLOB)
        clob.setvalue(0, mdl)
        cursor.execute(sql, mdl_clob=clob)
        while True:
            rows = cursor.fetchmany();
            if rows == []:
                break
            for data in rows:
                hitCount += 1
                if ofs.GetFormat() == OEFormat_SMI:  #write smiles
                    line = "%s\t%s\n" % (data[0],data[1])
                    ofs.write(line,len(line))
                else:
                    smi = data[0]
                    id = str(data[1])
                    outmol.Clear()
                    OESmilesToMol(outmol,smi)
                    outmol.SetTitle(id)
                    OEWriteMolecule(ofs, outmol)

        print "Found", hitCount, "matches"

    except cx_Oracle.DatabaseError, exc:
        HandleException(exc)

    finally:
        if cursor:
            cursor.close()
        if conn:
            conn.close()
        ofs.close()


InterfaceData = """
!BRIEF [-i] <input> [-o] <output> [-p password]
!PARAMETER -i 1
  !ALIAS -in
  !TYPE string
  !REQUIRED true
  !BRIEF Input file name
  !KEYLESS 1
!END
!PARAMETER -o 2
  !ALIAS -out
  !TYPE string
  !BRIEF Output file name
  !KEYLESS 2
!END
!PARAMETER -p 3
  !TYPE string
  !REQUIRED false
  !BRIEF oracle password
!END
"""


def main(argv=[__name__]):

    if not ORACLE_SID:
        print >> sys.stderr, "ORACLE_SID not defined.  Please set \
                              the ORACLE_SID environment variable"
        sys.exit(1)

    itf = OEInterface(InterfaceData, argv)

    ifs = oemolistream()
    filename = itf.GetString("-i")
    fp = open(filename, 'r')
    if not fp:
        OEThrow.Fatal("Unable to open %s for reading" % filename)
    mdl = fp.read()
    fp.close()

    ofs = oemolostream()
    filename = itf.GetString("-o")
    if not ofs.open(filename):
        OEThrow.Fatal("Unable to open %s for writing" % filename)

    password = None
    if itf.HasString('-p'):
        password = itf.GetString('-p')
    elif not password:
        password = getpass.getpass()

    MDLSubstructureSearch(mdl,ofs,password)


if __name__ == "__main__":
    sys.exit(main(sys.argv))

An example of populating a blob column with conformers stored in an OEBinary array:

#!/usr/bin/env python

from openeye.oechem import *
import os,sys
import cx_Oracle
import getpass

ORACLE_SID = None
if os.environ['ORACLE_SID']:
    ORACLE_SID = os.environ['ORACLE_SID']

def HandleException(exc):
    error, = exc.args
    print >> sys.stderr, "Oracle-Error-Code:", error.code
    print >> sys.stderr, "Oracle-Error-Message:", error.message


def GetSingleResult(cursor):
    rows = cursor.fetchone()
    if rows:
        for row in rows:
            return row

    return None


def LoadConformers(ifs,password,tableName,columnName):
    conn = None
    cursor = None

    try:
        conn = cx_Oracle.connect("ARNACHM1_TEST", password, ORACLE_SID)

        cursor = conn.cursor()

        ofs = oemolostream()
        ofs.SetFormat(OEFormat_OEB)
        count = 0
        mol = OEMol()
        while OEReadMolecule(ifs,mol):

            ofs.openstring()
            OEWriteMolecule(ofs,mol)
            ofs.close()

            title = mol.GetTitle()
            cursor.execute("select title from %s where title = :title"\
                           % tableName,
                           title=title)
            molID = GetSingleResult(cursor)

            blobvar = cursor.var(cx_Oracle.BLOB)
            cursor.execute("""update %s set %s = empty_blob() where \
                              title = :title returning conf into :blobvar"""\
                           % (tableName,columnName),
                           title=title,
                           blobvar=blobvar)

            blobvar.getvalue().write(ofs.GetString())

            count += 1
            if count % 1000 == 0:
                conn.commit()
                sys.stdout.write("Loaded %s molecules (so far)\r" % count)
                sys.stdout.flush()

        print "Loaded", count, "molecules (complete)"

        conn.commit()

    except cx_Oracle.DatabaseError, exc:
        HandleException(exc)

    finally:
        if cursor:
            cursor.close()
        if conn:
            conn.close()


InterfaceData = """
!BRIEF [-i] <input> [-p password]
!PARAMETER -i 1
  !ALIAS -in
  !TYPE string
  !REQUIRED true
  !BRIEF Input file name
  !KEYLESS 1
!END
!PARAMETER -p 2
  !TYPE string
  !REQUIRED false
  !BRIEF oracle password
!END
!PARAMETER -table_name 3
  !ALIAS -tname
  !TYPE string
  !DEFAULT test
  !REQUIRED false
  !BRIEF name of Oracle table to be searched
!END
!PARAMETER -column_name 4
  !ALIAS -cname
  !TYPE string
  !DEFAULT conf
  !REQUIRED false
  !BRIEF name of Oracle column to be searched
!END
"""

def main(argv=[__name__]):

    if not ORACLE_SID:
        print >> sys.stderr, "ORACLE_SID not defined.  Please set \
                              the ORACLE_SID environment variable"
        sys.exit(1)

    itf = OEInterface(InterfaceData, argv)

    password = None
    if itf.HasString('-p'):
        password = itf.GetString('-p')
    elif not password:
        password = getpass.getpass()

    ifs = oemolistream()
    filename = itf.GetString("-i")
    if not ifs.open(filename):
        OEThrow.Fatal("Unable to open %s for reading" % filename)
    hand = ifs.GetBinaryIOHandler()
    hand.Clear()
    OEInitHandler(hand,OEBRotCompressOpts(),OEBDefaultOpts())

    tableName = itf.GetString('-tname')
    columnName = itf.GetString('-cname')

    LoadConformers(ifs,password,tableName,columnName)

if __name__ == "__main__":
    sys.exit(main(sys.argv))

A shape similarity search example:

#!/usr/bin/env python

from openeye.oechem import *
import os
import sys
import cx_Oracle
import getpass

ORACLE_SID = None
if os.environ['ORACLE_SID']:
    ORACLE_SID = os.environ['ORACLE_SID']

def HandleException(exc):
    error, = exc.args
    print >> sys.stderr, "Oracle-Error-Code:", error.code
    print >> sys.stderr, "Oracle-Error-Message:", error.message


def ShapeSearch(query,ofs,password,tableName,columnName,tanCutoff,maxHits):

    hitCount = 0
    outmol = OEMol()
    conn = None
    cursor = None

    dimension = query.GetDimension()
    molstr = ""
    if dimension == 3:
        qfs = oemolostream()
        qfs.SetFormat(OEFormat_SDF)
        qfs.openstring()
        OEWriteMolecule(qfs,query)
        qfs.close()
        molstr = qfs.GetString()
    else:
        molstr = OEMolToSmiles(query)

    try:
        conn = cx_Oracle.connect("ARNACHM1_TEST", password, ORACLE_SID)
        cursor = conn.cursor()

        sql = "select c$arnachm1.fastrocs_conformation(1) from %s where \
              c$arnachm1.fastrocs(%s, :molstr, %f, %d, 1) = 1" \
              % (tableName,columnName,tanCutoff,maxHits)

        if dimension == 3:
            clob = cursor.var(cx_Oracle.CLOB)
            clob.setvalue(0, molstr)
            cursor.execute(sql, molstr=clob)
        else:
            cursor.execute(sql, molstr=molstr)

        while True:
            rows = cursor.fetchmany();
            if rows == []:
                break
            for data in rows:
                hitCount += 1
                resfs = oemolistream()
                resfs.SetFormat(OEFormat_SDF)
                resfs.openstring(data[0].read())
                if OEReadMolecule(resfs,outmol):
                    OEWriteMolecule(ofs, outmol)

        print "Found", hitCount, "matches"

    except cx_Oracle.DatabaseError, exc:
        HandleException(exc)

    finally:
        if cursor:
            cursor.close()
        if conn:
            conn.close()
        ofs.close()


InterfaceData = """
!BRIEF [-i] <input> [-o] <output> [-p password] [-max_hits max_hits] [-tname table_name] []
!PARAMETER -i 1
  !ALIAS -in
  !TYPE string
  !REQUIRED true
  !BRIEF Input file name
!END
!PARAMETER -o 2
  !ALIAS -out
  !TYPE string
  !REQUIRED true
  !BRIEF Output file name
!END
!PARAMETER -p 3
  !TYPE string
  !REQUIRED false
  !BRIEF oracle password
!END
!PARAMETER -table_name 4
  !ALIAS -tname
  !TYPE string
  !DEFAULT test
  !REQUIRED false
  !BRIEF name of Oracle table to be searched
!END
!PARAMETER -column_name 5
  !ALIAS -cname
  !TYPE string
  !DEFAULT conf
  !REQUIRED false
  !BRIEF name of Oracle column to be searched
!END
!PARAMETER -max_hits 6
  !TYPE int
  !REQUIRED false
  !DEFAULT 1000
  !BRIEF maximum number of hits to return
!END
!PARAMETER -tanimoto_cutoff 7
  !ALIAS -tcut
  !TYPE float
  !DEFAULT 1.2
  !REQUIRED false
  !BRIEF maximum number of hits to return
!END
"""


def main(argv=[__name__]):

    if not ORACLE_SID:
        print >> sys.stderr, "ORACLE_SID not defined.  Please set \
                              the ORACLE_SID environment variable"
        sys.exit(1)

    itf = OEInterface(InterfaceData, argv)

    ofs = oemolostream()
    filename = itf.GetString("-o")
    if not ofs.open(filename):
        OEThrow.Fatal("Unable to open %s for writing" % filename)

    password = None
    if itf.HasString('-p'):
        password = itf.GetString('-p')
    elif not password:
        password = getpass.getpass()

    filename = itf.GetString("-i")
    ifs = oemolistream()
    if not ifs.open(filename):
        OEThrow.Fatal("Unable to open %s for reading" % filename)

    tableName = itf.GetString('-tname')
    columnName = itf.GetString('-cname')
    maxHits = itf.GetInt('-max_hits')
    tanCutoff = itf.GetFloat('-tcut')

    mol = OEGraphMol()
    while OEReadMolecule(ifs,mol):
        ShapeSearch(mol,ofs,password,tableName,columnName,maxHits,tanCutoff)


if __name__ == "__main__":
    sys.exit(main(sys.argv))

Previous topic

Example Code

Next topic

Java Examples