Appendix: Additional Examples in Python

These are full listings of programming examples that are excerpted or offered for download, elsewhere in this chapter. See the full list of examples in this chapter.

Listing 1: Full listing of CustomColorForceField.py.

#!/usr/bin/env python
# (C) 2022 Cadence Design Systems, Inc. (Cadence) 
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.

import os
import sys

from openeye import oechem
from openeye import oefastrocs
from openeye import oeshape

oepy = os.path.join(os.path.dirname(__file__), "..", "python")
sys.path.insert(0, os.path.realpath(oepy))


def main(argv=[__name__]):
    if len(argv) < 3:
        oechem.OEThrow.Usage("%s <database> [<queries> ... ]" % argv[0])

    if not oefastrocs.OEFastROCSIsGPUReady():
        oechem.OEThrow.Info("No supported GPU available!")
        return 0

    dbname = argv[1]
    # read in database
    ifs = oechem.oemolistream()
    if not ifs.open(dbname):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    print("Opening database file %s ..." % dbname)
    timer = oechem.OEWallTimer()
    cff = oeshape.OEColorForceField()
    cff.Init(oeshape.OEColorFFType_ImplicitMillsDeanNoRings)
    cff.ClearInteractions()
    cff.AddInteraction(
        cff.GetType("donor"), cff.GetType("donor"), "gaussian", -1.0, range
    )
    cff.AddInteraction(
        cff.GetType("acceptor"), cff.GetType("acceptor"), "gaussian", -1.0, range
    )
    dbase = oefastrocs.OEShapeDatabase(cff)
    moldb = oechem.OEMolDatabase()
    if not moldb.Open(ifs):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    dots = oechem.OEThreadedDots(10000, 200, "conformers")
    if not dbase.Open(moldb, dots):
        oechem.OEThrow.Fatal("Unable to initialize OEShapeDatabase on '%s'" % dbname)

    dots.Total()
    print("%f seconds to load database" % timer.Elapsed())

    opts = oefastrocs.OEShapeDatabaseOptions()

    for qfname in argv[2:]:
        # read in query
        qfs = oechem.oemolistream()
        if not qfs.open(qfname):
            oechem.OEThrow.Fatal("Unable to open '%s'" % qfname)

        query = oechem.OEGraphMol()
        if not oechem.OEReadMolecule(qfs, query):
            oechem.OEThrow.Fatal("Unable to read query from '%s'" % qfname)

        ext = oechem.OEGetFileExtension(qfname)
        base = qfname[:-(len(ext) + 1)]

        # write out everthing to a similary named file
        ofs = oechem.oemolostream()
        ofname = base + "_results." + ext
        if not ofs.open(ofname):
            oechem.OEThrow.Fatal("Unable to open '%s'" % argv[4])

        print("Searching for %s" % qfname)
        numhits = moldb.NumMols()
        opts.SetLimit(numhits)

        for score in dbase.GetSortedScores(query, opts):
            dbmol = oechem.OEMol()
            molidx = score.GetMolIdx()
            if not moldb.GetMolecule(dbmol, molidx):
                print("Unable to retrieve molecule '%u' from the database" % molidx)
                continue

            mol = oechem.OEGraphMol(dbmol.GetConf(oechem.OEHasConfIdx(score.GetConfIdx())))

            oechem.OESetSDData(mol, "ShapeTanimoto", "%.4f" % score.GetShapeTanimoto())
            oechem.OESetSDData(mol, "ColorTanimoto", "%.4f" % score.GetColorTanimoto())
            oechem.OESetSDData(mol, "TanimotoCombo", "%.4f" % score.GetTanimotoCombo())
            score.Transform(mol)

            oechem.OEWriteMolecule(ofs, mol)
        print("Wrote results to %s" % ofname)

    return 0


if __name__ == '__main__':
    sys.exit(main(sys.argv))

Listing 2: Full listing of OEShapeDatabaseOptions_SetUserStarts.py.

#!/usr/bin/env python
# (C) 2022 Cadence Design Systems, Inc. (Cadence) 
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.

from openeye import oechem
from openeye import oefastrocs

opts = oefastrocs.OEShapeDatabaseOptions()
opts.SetInitialOrientation(oefastrocs.OEFastROCSOrientation_UserInertialStarts)

startsCoords = oechem.OEFloatVector()

startsCoords.append(float(1.45))
startsCoords.append(float(6.78))
startsCoords.append(float(-3.21))

opts.SetUserStarts(startsCoords, len(startsCoords) / 3)

coords = oechem.OEFloatVector(opts.GetNumStarts() * 3)
opts.GetUserStarts(coords)

Listing 3: Full listing of Tutorial1_InertialAtHeavyAtoms_Snippet.py

#!/usr/bin/env python
# (C) 2022 Cadence Design Systems, Inc. (Cadence) 
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.

import os
import sys

from openeye import oechem
from openeye import oefastrocs

oepy = os.path.join(os.path.dirname(__file__), "..", "python")
sys.path.insert(0, os.path.realpath(oepy))

shapeOnlyDB = oefastrocs.OEShapeDatabase(oefastrocs.OEShapeDatabaseType_Shape)

opts = oefastrocs.OEShapeDatabaseOptions()
opts.SetLimit(5)
opts.SetInitialOrientation(oefastrocs.OEFastROCSOrientation_InertialAtHeavyAtoms)

qfs = oechem.oemolistream()
qfs.open(sys.argv[1])

query = oechem.OEGraphMol()
oechem.OEReadMolecule(qfs, query)
if opts.GetInitialOrientation() == oefastrocs.OEFastROCSOrientation_InertialAtHeavyAtoms:
    numStarts = opts.GetNumHeavyAtomStarts(query)
    oechem.OEThrow.Info("This example will use %u starts" % numStarts)

startsCoords = oechem.OEFloatVector()
xyz = query.GetCoords()[34]
for x in xyz:
    startsCoords.append(x)

opts.SetInitialOrientation(oefastrocs.OEFastROCSOrientation_UserInertialStarts)
opts.SetUserStarts(oechem.OEFloatVector(startsCoords), int(len(startsCoords)/3))

if opts.GetInitialOrientation() == oefastrocs.OEFastROCSOrientation_UserInertialStarts:
    numStarts = opts.GetNumUserStarts()
    oechem.OEThrow.Info("This example will use %u starts" % numStarts)

oechem.OEThrow.Info("Opening database file %s ..." % sys.argv[2])
dbase = oefastrocs.OEShapeDatabase()
moldb = oechem.OEMolDatabase()

moldb.Open(sys.argv[2])
dbase.Open(moldb, oefastrocs.OEFastROCSOrientation_AsIs)

opts.SetInitialOrientation(oefastrocs.OEFastROCSOrientation_AsIs)

if opts.GetInitialOrientation() == oefastrocs.OEFastROCSOrientation_AsIs:
    numStarts = opts.GetNumStarts()
    numInertialStarts = opts.GetNumInertialStarts()
    oechem.OEThrow.Info("This example will use %u starts & %u inertial starts"
                        % (numStarts, numInertialStarts))

opts.SetMaxOverlays(opts.GetNumInertialStarts() * opts.GetNumUserStarts())

opts.SetLimit(50)
opts.SetMaxConfs(5)

Listing 4: Shape Database Client Histogram.

#!/usr/bin/env python
# (C) 2022 Cadence Design Systems, Inc. (Cadence) 
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.

from __future__ import unicode_literals
import os
import sys
import argparse

try:
    from xmlrpclib import ServerProxy, Binary, Fault
except ImportError:  # python 3
    from xmlrpc.client import ServerProxy, Binary, Fault


class Pyasciigraph:
    """ Copied from https://pypi.python.org/pypi/ascii_graph/0.2.1 """
    def __init__(self, line_length=79, min_graph_length=50, separator_length=2):
        """Constructor of Pyasciigraph

        :param int line_length: the max number of char on a line
                if any line cannot be shorter,
                it will go over this limit
        :param int min_graph_length: the min number of char used by the graph
        :param int separator_length: the length of field separator
        """
        self.line_length = line_length
        self.separator_length = separator_length
        self.min_graph_length = min_graph_length

    def _u(self, x):
        if sys.version < '3':
            import codecs
            return codecs.unicode_escape_decode(x)[0]
        else:
            return x

    def _get_maximum(self, data):
        all_max = {}
        all_max['value_max_length'] = 0
        all_max['info_max_length'] = 0
        all_max['max_value'] = 0

        for (info, value) in data:
            if value > all_max['max_value']:
                all_max['max_value'] = value

            if len(info) > all_max['info_max_length']:
                all_max['info_max_length'] = len(info)

            if len(str(value)) > all_max['value_max_length']:
                all_max['value_max_length'] = len(str(value))
        return all_max

    def _gen_graph_string(self, value, max_value, graph_length, start_value):
        number_of_square = 0
        if max_value:
            number_of_square = int(value * graph_length / max_value)
        number_of_space = int(start_value - number_of_square)
        return '#' * number_of_square + self._u(' ') * number_of_space

    def _gen_info_string(self, info, start_info, line_length):
        number_of_space = (line_length - start_info - len(info))
        return info + self._u(' ') * number_of_space

    def _gen_value_string(self, value, start_value, start_info):
        number_space = start_info -\
                start_value -\
                len(str(value)) -\
                self.separator_length

        return ' ' * number_space +\
            str(value) +\
            ' ' * self.separator_length

    def _sanitize_string(self, string):
        # get the type of a unicode string
        unicode_type = type(self._u('t'))
        input_type = type(string)
        if input_type is str:
            info = string
        elif input_type is unicode_type:
            info = string
        elif input_type is int or input_type is float:
            info = str(string)
        return info

    def _sanitize_data(self, data):
        ret = []
        for item in data:
            ret.append((self._sanitize_string(item[0]), item[1]))
        return ret

    def graph(self, label, data, sort=0, with_value=True):
        """function generating the graph

        :param string label: the label of the graph
        :param iterable data: the data (list of tuple (info, value))
                info must be "castable" to a unicode string
                value must be an int or a float
        :param int sort: flag sorted
                0: not sorted (same order as given) (default)
                1: increasing order
                2: decreasing order
        :param boolean with_value: flag printing value
                True: print the numeric value (default)
                False: don't print the numeric value
        :rtype: a list of strings (each lines)

        """
        result = []
        san_data = self._sanitize_data(data)
        san_label = self._sanitize_string(label)

        if sort == 1:
            san_data = sorted(san_data, key=lambda value: value[1], reverse=False)
        elif sort == 2:
            san_data = sorted(san_data, key=lambda value: value[1], reverse=True)

        all_max = self._get_maximum(san_data)

        real_line_length = max(self.line_length, len(label))

        min_line_length = self.min_graph_length + 2 * self.separator_length +\
            all_max['value_max_length'] + all_max['info_max_length']

        if min_line_length < real_line_length:
            # calcul of where to start info
            start_info = self.line_length -\
                all_max['info_max_length']
            # calcul of where to start value
            start_value = start_info -\
                self.separator_length -\
                all_max['value_max_length']
            # calcul of where to end graph
            graph_length = start_value -\
                self.separator_length
        else:
            # calcul of where to start value
            start_value = self.min_graph_length +\
                self.separator_length
            # calcul of where to start info
            start_info = start_value +\
                all_max['value_max_length'] +\
                self.separator_length
            # calcul of where to end graph
            graph_length = self.min_graph_length
            # calcul of the real line length
            real_line_length = min_line_length

        result.append(san_label)
        result.append(self._u('#') * real_line_length)

        for item in san_data:
            info = item[0]
            value = item[1]

            graph_string = self._gen_graph_string(
                value,
                all_max['max_value'],
                graph_length,
                start_value
                )

            value_string = self._gen_value_string(
                value,
                start_value,
                start_info
                )

            info_string = self._gen_info_string(
                info,
                start_info,
                real_line_length
                )
            new_line = graph_string + value_string + info_string
            result.append(new_line)

        return result


def AddBin(bins, binSize, binIdx, curTotal):
    lowerBound = binSize * binIdx
    label = "%.2f" % lowerBound
    bins.append((label, curTotal))


def GetGraphTitle(tversky, shapeOnly):
    if not tversky and not shapeOnly:
        return "FastROCS Tanimoto Combo Score Distribution"
    if not tversky and shapeOnly:
        return "FastROCS Tanimoto Shape Score Distribution"
    if tversky and not shapeOnly:
        return "FastROCS Tversky Combo Score Distribution"
    if tversky and shapeOnly:
        return "FastROCS Tversky Shape Score Distribution"


def PrintHistogram(hist, tversky=None, shapeOnly=None):
    squashFactor = 10
    if shapeOnly:
        maxScore = 1.0
    else:
        maxScore = 2.0
    binSize = maxScore/(len(hist) / squashFactor)

    bins = []
    curTotal = 0
    binIdx = 0
    for i, val in enumerate(hist):
        if i != 0 and (i % squashFactor) == 0:
            AddBin(bins, binSize, binIdx, curTotal)
            curTotal = 0
            binIdx += 1

        curTotal += val
    AddBin(bins, binSize, binIdx, curTotal)

    graph = Pyasciigraph()

    for line in graph.graph(GetGraphTitle(tversky, shapeOnly), bins):
        print(line)


def GetFormatExtension(fname):
    base, ext = os.path.splitext(fname.lower())
    if ext == ".gz":
        base, ext = os.path.splitext(base)
        ext += ".gz"
    return ext


def main(argv=[__name__]):

    parser = argparse.ArgumentParser()

    # positional arguments retaining backward compatibility
    parser.add_argument('server:port', help='Server name and port number \
                        of database to search i.e. localhost:8080.')
    parser.add_argument('query', help='File containing the query molecule to search \
                        (format not restricted to *.oeb).')
    parser.add_argument('results', help='Output file to store results \
                        (format not restricted to *.oeb).')
    parser.add_argument('nHits', nargs='?', type=int, default=100,
                        help='Number of hits to return (default=100).')
    parser.add_argument('--tversky', action='store_true', default=argparse.SUPPRESS,
                        help='Switch to Tversky similarity scoring (default=Tanimoto).')
    parser.add_argument('--shapeOnly', action='store_true', default=argparse.SUPPRESS,
                        help='Switch to shape-only scores (default=Combo).')
    parser.add_argument('--alternativeStarts', default=argparse.SUPPRESS, nargs=1, dest='altStarts',
                        choices=('random', 'subrocs',
                                 'inertialAtHeavyAtoms', 'inertialAtColorAtoms'),
                        help='Optimize using alternative starts. '
                             'To perform N random starts do \
                             "--alternativeStarts random N" (default N=10)')

    known, remaining = (parser.parse_known_args())
    dargs = vars(known)

    qfname = dargs.pop('query')
    numHits = dargs.pop('nHits')

    startType = dargs.get('altStarts', None)

    if startType:
        dargs['altStarts'] = str(startType[0])
        if len(remaining) == 1 and dargs['altStarts'] == 'random':
            try:
                numRands = int(remaining[0])
                dargs['randStarts'] = numRands
            except ValueError:
                print("Invalid argument given. See --help menu for argument list")
                sys.exit()
        if len(remaining) > 1:
            print("Too many arguments given. See --help menu for argument list")
            sys.exit()
    else:
        if remaining:
            print("Too many arguments given. See --help menu for argument list")
            sys.exit()

    try:
        fh = open(qfname, 'rb')
    except IOError:
        sys.stderr.write("Unable to open '%s' for reading" % qfname)
        return 1

    iformat = GetFormatExtension(qfname)

    ofname = dargs.pop('results')
    oformat = GetFormatExtension(ofname)

    s = ServerProxy("http://" + dargs.pop('server:port'))
    data = Binary(fh.read())

    try:
        idx = s.SubmitQuery(data, numHits, iformat, oformat, dargs)
    except Fault as e:
        sys.stderr.write(str(e))
        return 1

    while True:
        blocking = True
        try:
            current, total = s.QueryStatus(idx, blocking)
            hist = s.QueryHistogram(idx)
        except Fault as e:
            print(str(e), file=sys.stderr)
            return 1

        if total == 0:
            continue

        PrintHistogram(hist, dargs.get('tversky', None), dargs.get('shapeOnly', None))

        if total <= current:
            break

    results = s.QueryResults(idx)

    # assuming the results come back as a string in the requested format
    with open(ofname, 'wb') as output:
        output.write(results.data)

    return 0


if __name__ == '__main__':
    sys.exit(main(sys.argv))

Listing 6: AsIs starts.

#!/usr/bin/env python
# (C) 2022 Cadence Design Systems, Inc. (Cadence) 
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.

import sys
import os

from openeye import oechem
from openeye import oefastrocs

oepy = os.path.join(os.path.dirname(__file__), "..", "python")
sys.path.insert(0, os.path.realpath(oepy))


def main(argv=[__name__]):
    if len(argv) < 3:
        oechem.OEThrow.Usage("%s <database.oeb> <queries> <hits.oeb>" % argv[0])
        return 0

    # check system
    if not oefastrocs.OEFastROCSIsGPUReady():
        oechem.OEThrow.Info("No supported GPU available!")
        return 0

    # read in database
    dbname = argv[1]

    if oechem.OEIsGZip(dbname):
        oechem.OEThrow.Fatal("%s is an unsupported database file format as it is gzipped.\n"
                             "Preferred formats are .oeb, .sdf or .oez", dbname)

    print("Opening database file %s ..." % dbname)
    dbase = oefastrocs.OEShapeDatabase()
    moldb = oechem.OEMolDatabase()

    if not moldb.Open(dbname):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    dots = oechem.OEThreadedDots(10000, 200, "conformers")
    if not dbase.Open(moldb, dots, oefastrocs.OEFastROCSOrientation_AsIs):
        oechem.OEThrow.Fatal("Unable to initialize OEShapeDatabase on '%s'" % dbname)

    # customize search options
    opts = oefastrocs.OEShapeDatabaseOptions()
    opts.SetInitialOrientation(oefastrocs.OEFastROCSOrientation_AsIs)

    opts.SetLimit(50)
    opts.SetMaxConfs(5)
    opts.SetMaxOverlays(opts.GetNumInertialStarts() * opts.GetNumStarts())

    if opts.GetInitialOrientation() == oefastrocs.OEFastROCSOrientation_AsIs:
        numStarts = opts.GetNumStarts()
        numInertialStarts = opts.GetNumInertialStarts()
        oechem.OEThrow.Info("This example will use %u starts & %u inertial starts"
                            % (numStarts, numInertialStarts))

    qfname = argv[2]
    # read in query
    qfs = oechem.oemolistream()
    if not qfs.open(qfname):
        oechem.OEThrow.Fatal("Unable to open '%s'" % qfname)

    query = oechem.OEGraphMol()
    if not oechem.OEReadMolecule(qfs, query):
        oechem.OEThrow.Fatal("Unable to read query from '%s'" % qfname)

    # write out everthing to a similary named file
    ofs = oechem.oemolostream()
    if not ofs.open(argv[3]):
        oechem.OEThrow.Fatal("Unable to open '%s'" % argv[3])
    oechem.OEWriteMolecule(ofs, query)

    print("Searching for %s" % qfname)
    for score in dbase.GetSortedScores(query, opts):
        print("Score for mol %u(conf %u) %f shape %f color" % (
               score.GetMolIdx(), score.GetConfIdx(),
               score.GetShapeTanimoto(), score.GetColorTanimoto()))
        dbmol = oechem.OEMol()
        molidx = score.GetMolIdx()
        if not moldb.GetMolecule(dbmol, molidx):
            print("Unable to retrieve molecule '%u' from the database" % molidx)
            continue

        mol = oechem.OEGraphMol(dbmol.GetConf(oechem.OEHasConfIdx(score.GetConfIdx())))
        oechem.OESetSDData(mol, "ShapeTanimoto", "%.4f" % score.GetShapeTanimoto())
        oechem.OESetSDData(mol, "ColorTanimoto", "%.4f" % score.GetColorTanimoto())
        oechem.OESetSDData(mol, "TanimotoCombo", "%.4f" % score.GetTanimotoCombo())
        score.Transform(mol)

        oechem.OEWriteMolecule(ofs, mol)
    print("Wrote results to %s" % argv[3])

    return 0


if __name__ == '__main__':
    sys.exit(main(sys.argv))

Listing 7: Database searching with queries.

#!/usr/bin/env python
# (C) 2022 Cadence Design Systems, Inc. (Cadence) 
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.

import os
import sys

from openeye import oechem
from openeye import oefastrocs
from openeye import oeshape

oepy = os.path.join(os.path.dirname(__file__), "..", "python")
sys.path.insert(0, os.path.realpath(oepy))


def main(argv=[__name__]):
    if len(argv) < 3:
        oechem.OEThrow.Usage("%s <database> [<queries> ... ]" % argv[0])

    if not oefastrocs.OEFastROCSIsGPUReady():
        oechem.OEThrow.Info("No supported GPU available!")
        return 0

    dbname = argv[1]
    if oechem.OEIsGZip(dbname):
        oechem.OEThrow.Fatal("%s is an unsupported database file format as it is gzipped.\n"
                             "Preferred formats are .oeb, .sdf or .oez" % dbname)

    # read in database
    ifs = oechem.oemolistream()
    if not ifs.open(dbname):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    print("Opening database file %s ..." % dbname)
    timer = oechem.OEWallTimer()
    dbase = oefastrocs.OEShapeDatabase()
    moldb = oechem.OEMolDatabase()
    if not moldb.Open(ifs):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    dots = oechem.OEThreadedDots(10000, 200, "conformers")
    if not dbase.Open(moldb, dots):
        oechem.OEThrow.Fatal("Unable to initialize OEShapeDatabase on '%s'" % dbname)

    dots.Total()
    print("%f seconds to load database" % timer.Elapsed())

    for qfname in argv[2:]:

        ext = oechem.OEGetFileExtension(qfname)
        base = qfname[:-(len(ext) + 1)]

        if ext == 'sq':
            query = oeshape.OEShapeQuery()
            if not oeshape.OEReadShapeQuery(qfname, query):
                oechem.OEThrow.Fatal("Unable to open '%s'" % qfname)
            ext = 'oeb'
        else:
            # read in query
            qfs = oechem.oemolistream()
            if not qfs.open(qfname):
                oechem.OEThrow.Fatal("Unable to open '%s'" % qfname)

            query = oechem.OEGraphMol()
            if not oechem.OEReadMolecule(qfs, query):
                oechem.OEThrow.Fatal("Unable to read query from '%s'" % qfname)

        # write out everthing to a similary named file
        ofs = oechem.oemolostream()
        ofname = base + "_results." + ext
        if not ofs.open(ofname):
            oechem.OEThrow.Fatal("Unable to open '%s'" % argv[4])

        print("Searching for %s" % qfname)
        numHits = moldb.NumMols()
        opts = oefastrocs.OEShapeDatabaseOptions()
        opts.SetLimit(numHits)
        for score in dbase.GetSortedScores(query, opts):
            dbmol = oechem.OEMol()
            molidx = score.GetMolIdx()
            if not moldb.GetMolecule(dbmol, molidx):
                print("Unable to retrieve molecule '%u' from the database" % molidx)
                continue

            mol = oechem.OEGraphMol(dbmol.GetConf(oechem.OEHasConfIdx(score.GetConfIdx())))

            oechem.OESetSDData(mol, "ShapeTanimoto", "%.4f" % score.GetShapeTanimoto())
            oechem.OESetSDData(mol, "ColorTanimoto", "%.4f" % score.GetColorTanimoto())
            oechem.OESetSDData(mol, "TanimotoCombo", "%.4f" % score.GetTanimotoCombo())
            score.Transform(mol)

            oechem.OEWriteMolecule(ofs, mol)
        print("Wrote results to %s" % ofname)

    return 0


if __name__ == '__main__':
    sys.exit(main(sys.argv))

Listing 8: Database searching with every conformer of every query.

#!/usr/bin/env python
# (C) 2022 Cadence Design Systems, Inc. (Cadence) 
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.

import os
import sys
import argparse

from openeye import oechem
from openeye import oefastrocs

oepy = os.path.join(os.path.dirname(__file__), "..", "python")
sys.path.insert(0, os.path.realpath(oepy))


def main(argv=[__name__]):

    parser = argparse.ArgumentParser()

    # positional arguments retaining backward compatibility
    parser.add_argument('database',
                        help='File containing the database molecules to be search \
                              (format not restricted to *.oeb).')
    parser.add_argument('query', default=[], nargs='+',
                        help='File containing the query molecule(s) to be search \
                              (format not restricted to *.oeb).')
    parser.add_argument('--nHits', dest='nHits', type=int, default=100,
                        help='Number of hits to return (default = number of database mols).')
    parser.add_argument('--cutoff',  dest='cutoff', type=float, default=argparse.SUPPRESS,
                        help='Specify a cutoff criteria for scores.')
    parser.add_argument('--tversky', dest='tversky', action='store_true', default=argparse.SUPPRESS,
                        help='Switch to Tversky similarity scoring (default = Tanimoto).')

    args = parser.parse_args()

    dbname = args.database
    if oechem.OEIsGZip(dbname):
        oechem.OEThrow.Fatal("%s is an unsupported database file format as it is gzipped.\n"
                             "Preferred formats are .oeb, .sdf or .oez", dbname)

    if not oefastrocs.OEFastROCSIsGPUReady():
        oechem.OEThrow.Info("No supported GPU available!")
        return 0

    # set options
    opts = oefastrocs.OEShapeDatabaseOptions()
    opts.SetLimit(args.nHits)
    print("Number of hits set to %u" % opts.GetLimit())
    if hasattr(args, 'cutoff') is not False:
        opts.SetCutoff(args.cutoff)
        print("Cutoff set to %f" % args.cutoff)
    if hasattr(args, 'tversky') is not False:
        opts.SetSimFunc(args.tversky)
        print("Tversky similarity scoring set.")

    # read in database
    ifs = oechem.oemolistream()
    if not ifs.open(dbname):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    print("\nOpening database file %s ..." % dbname)
    timer = oechem.OEWallTimer()
    dbase = oefastrocs.OEShapeDatabase()
    moldb = oechem.OEMolDatabase()
    if not moldb.Open(ifs):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    dots = oechem.OEThreadedDots(10000, 200, "conformers")
    if not dbase.Open(moldb, dots):
        oechem.OEThrow.Fatal("Unable to initialize OEShapeDatabase on '%s'" % dbname)

    dots.Total()
    print("%f seconds to load database\n" % timer.Elapsed())

    for qfname in args.query:

        # read in query
        qfs = oechem.oemolistream()
        if not qfs.open(qfname):
            oechem.OEThrow.Fatal("Unable to open '%s'" % qfname)

        mcmol = oechem.OEMol()
        if not oechem.OEReadMolecule(qfs, mcmol):
            oechem.OEThrow.Fatal("Unable to read query from '%s'" % qfname)
        qfs.rewind()

        ext = oechem.OEGetFileExtension(qfname)

        qmolidx = 0
        while oechem.OEReadMolecule(qfs, mcmol):

            # write out to file name based on molecule title
            ofs = oechem.oemolostream()
            moltitle = mcmol.GetTitle()
            if len(moltitle) == 0:
                moltitle = str(qmolidx)
            ofname = moltitle + "_results." + ext
            if not ofs.open(ofname):
                oechem.OEThrow.Fatal("Unable to open '%s'" % argv[4])

            print("Searching for %s of %s (%s conformers)" % (moltitle, qfname, mcmol.NumConfs()))

            qconfidx = 0
            for conf in mcmol.GetConfs():

                for score in dbase.GetSortedScores(conf, opts):

                    dbmol = oechem.OEMol()
                    dbmolidx = score.GetMolIdx()
                    if not moldb.GetMolecule(dbmol, dbmolidx):
                        print("Unable to retrieve molecule '%u' from the database" % dbmolidx)
                        continue

                    mol = oechem.OEGraphMol(dbmol.GetConf(oechem.OEHasConfIdx(score.GetConfIdx())))

                    oechem.OESetSDData(mol, "QueryConfidx", "%s" % qconfidx)
                    oechem.OESetSDData(mol, "ShapeTanimoto", "%.4f" % score.GetShapeTanimoto())
                    oechem.OESetSDData(mol, "ColorTanimoto", "%.4f" % score.GetColorTanimoto())
                    oechem.OESetSDData(mol, "TanimotoCombo", "%.4f" % score.GetTanimotoCombo())
                    score.Transform(mol)

                    oechem.OEWriteMolecule(ofs, mol)

                qconfidx += 1

            print("%s conformers processed" % qconfidx)
            print("Wrote results to %s\n" % ofname)

        qmolidx += 1
    return 0


if __name__ == '__main__':
    sys.exit(main(sys.argv))

Listing 9: Database searching with queries using Tversky similarity scoring.

#!/usr/bin/env python
# (C) 2022 Cadence Design Systems, Inc. (Cadence) 
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.

import os
import sys

from openeye import oechem
from openeye import oefastrocs

oepy = os.path.join(os.path.dirname(__file__), "..", "python")
sys.path.insert(0, os.path.realpath(oepy))


def main(argv=[__name__]):
    if len(argv) < 3:
        oechem.OEThrow.Usage("%s <database> [<queries> ... ]" % argv[0])

    dbname = argv[1]
    if oechem.OEIsGZip(dbname):
        oechem.OEThrow.Fatal("%s is an unsupported database file format as it is gzipped.\n"
                             "Preferred formats are .oeb, .sdf or .oez", dbname)

    # read in database
    ifs = oechem.oemolistream()
    if not ifs.open(dbname):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    print("Opening database file %s ..." % dbname)
    timer = oechem.OEWallTimer()
    dbase = oefastrocs.OEShapeDatabase()
    moldb = oechem.OEMolDatabase()
    if not moldb.Open(ifs):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    dots = oechem.OEThreadedDots(10000, 200, "conformers")
    if not dbase.Open(moldb, dots):
        oechem.OEThrow.Fatal("Unable to initialize OEShapeDatabase on '%s'" % dbname)

    dots.Total()
    print("%s seconds to load database" % timer.Elapsed())

    opts = oefastrocs.OEShapeDatabaseOptions()
    opts.SetSimFunc(oefastrocs.OEShapeSimFuncType_Tversky)
    numHits = moldb.NumMols()
    opts.SetLimit(numHits)

    for qfname in argv[2:]:
        # read in query
        qfs = oechem.oemolistream()
        if not qfs.open(qfname):
            oechem.OEThrow.Fatal("Unable to open '%s'" % argv[1])

        query = oechem.OEGraphMol()
        if not oechem.OEReadMolecule(qfs, query):
            oechem.OEThrow.Fatal("Unable to read query from '%s'" % argv[1])

        ext = oechem.OEGetFileExtension(qfname)
        base = qfname[:-(len(ext) + 1)]

        # write out everthing to a similary named file
        ofs = oechem.oemolostream()
        ofname = base + "_results." + ext
        if not ofs.open(ofname):
            oechem.OEThrow.Fatal("Unable to open '%s'" % argv[4])

        print("Searching for %s" % qfname)
        for score in dbase.GetSortedScores(query, opts):
            dbmol = oechem.OEMol()
            molidx = score.GetMolIdx()
            if not moldb.GetMolecule(dbmol, molidx):
                print("Unable to retrieve molecule '%u' from the database" % molidx)
                continue
            mol = oechem.OEGraphMol(dbmol.GetConf(oechem.OEHasConfIdx(score.GetConfIdx())))

            oechem.OESetSDData(mol, "ShapeTversky", "%.4f" % score.GetShapeTversky())
            oechem.OESetSDData(mol, "ColorTversky", "%.4f" % score.GetColorTversky())
            oechem.OESetSDData(mol, "TverskyCombo", "%.4f" % score.GetTverskyCombo())
            score.Transform(mol)

            oechem.OEWriteMolecule(ofs, mol)
        print("Wrote results to %s" % ofname)
    return 0


if __name__ == '__main__':
    sys.exit(main(sys.argv))

Listing 10: Optimize over color overlap in addition to shape overlap.

#!/usr/bin/env python
# (C) 2022 Cadence Design Systems, Inc. (Cadence) 
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.

import os
import sys

from openeye import oechem
from openeye import oefastrocs

oepy = os.path.join(os.path.dirname(__file__), "..", "python")
sys.path.insert(0, os.path.realpath(oepy))


def main(argv=[__name__]):
    if len(argv) < 3:
        oechem.OEThrow.Usage("%s <database> [<queries> ... ]" % argv[0])

    if not oefastrocs.OEFastROCSIsGPUReady():
        oechem.OEThrow.Info("No supported GPU available!")
        return 0

    dbname = argv[1]
    # read in database
    ifs = oechem.oemolistream()
    if not ifs.open(dbname):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    print("Opening database file %s ..." % dbname)
    timer = oechem.OEWallTimer()
    dbase = oefastrocs.OEShapeDatabase()
    moldb = oechem.OEMolDatabase()
    if not moldb.Open(ifs):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    dots = oechem.OEThreadedDots(10000, 200, "conformers")
    if not dbase.Open(moldb, dots):
        oechem.OEThrow.Fatal("Unable to initialize OEShapeDatabase on '%s'" % dbname)

    dots.Total()
    print("%f seconds to load database" % timer.Elapsed())

    opts = oefastrocs.OEShapeDatabaseOptions()
    opts.SetColorOptimization(True)

    for qfname in argv[2:]:
        # read in query
        qfs = oechem.oemolistream()
        if not qfs.open(qfname):
            oechem.OEThrow.Fatal("Unable to open '%s'" % qfname)

        query = oechem.OEGraphMol()
        if not oechem.OEReadMolecule(qfs, query):
            oechem.OEThrow.Fatal("Unable to read query from '%s'" % qfname)

        ext = oechem.OEGetFileExtension(qfname)
        base = qfname[:-(len(ext) + 1)]

        # write out everthing to a similary named file
        ofs = oechem.oemolostream()
        ofname = base + "_results." + ext
        if not ofs.open(ofname):
            oechem.OEThrow.Fatal("Unable to open '%s'" % argv[4])

        print("Searching for %s" % qfname)
        numhits = moldb.NumMols()
        opts.SetLimit(numhits)

        for score in dbase.GetSortedScores(query, opts):
            dbmol = oechem.OEMol()
            molidx = score.GetMolIdx()
            if not moldb.GetMolecule(dbmol, molidx):
                print("Unable to retrieve molecule '%u' from the database" % molidx)
                continue

            mol = oechem.OEGraphMol(dbmol.GetConf(oechem.OEHasConfIdx(score.GetConfIdx())))

            oechem.OESetSDData(mol, "ShapeTanimoto", "%.4f" % score.GetShapeTanimoto())
            oechem.OESetSDData(mol, "ColorTanimoto", "%.4f" % score.GetColorTanimoto())
            oechem.OESetSDData(mol, "TanimotoCombo", "%.4f" % score.GetTanimotoCombo())
            score.Transform(mol)

            oechem.OEWriteMolecule(ofs, mol)
        print("Wrote results to %s" % ofname)

    return 0


if __name__ == '__main__':
    sys.exit(main(sys.argv))

Listing 11: Cache custom color atoms onto molecules.

#!/usr/bin/env python
# (C) 2022 Cadence Design Systems, Inc. (Cadence) 
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.

# Cache custom color atoms onto a molecule to be used by FastROCS

import os
import sys

from openeye import oechem
from openeye import oeshape
from openeye import oefastrocs

oepy = os.path.join(os.path.dirname(__file__), "..", "python")
sys.path.insert(0, os.path.realpath(oepy))

COLOR_FORCE_FIELD = """#
TYPE negative
#
#
PATTERN negative [-]
PATTERN negative [OD1+0]-[!#7D3]~[OD1+0]
PATTERN negative [OD1+0]-[!#7D4](~[OD1+0])~[OD1+0]
#
#
INTERACTION negative negative attractive gaussian weight=1.0 radius=1.0
"""


def main(argv=[__name__]):
    if len(argv) != 3:
        oechem.OEThrow.Usage("%s <input> <output>" % argv[0])

    # input - preserve rotor-offset-compression
    ifs = oechem.oemolistream()
    ihand = ifs.GetBinaryIOHandler()
    ihand.Clear()
    oechem.OEInitHandler(ihand, oechem.OEBRotCompressOpts(), oechem.OEBRotCompressOpts())

    ifname = argv[1]
    if not ifs.open(ifname):
        oechem.OEThrow.Fatal("Unable to open %s for reading" % argv[1])

    # output
    ofname = argv[2]
    oformt = oechem.OEGetFileType(oechem.OEGetFileExtension(ofname))
    if oformt != oechem.OEFormat_OEB:
        oechem.OEThrow.Fatal("Output file format much be OEB")

    ofs = oechem.oemolostream()
    if not ofs.open(ofname):
        oechem.OEThrow.Fatal("Unable to open %s for writing" % ofname)

    iss = oechem.oeisstream(COLOR_FORCE_FIELD)
    cff = oeshape.OEColorForceField()
    if not cff.Init(iss):
        oechem.OEThrow.Fatal("Unable to initialize OEColorForceField")

    dots = oechem.OEDots(10000, 200, "molecules")
    for mol in ifs.GetOEMols():
        oefastrocs.OEPrepareFastROCSMol(mol, cff)

        oechem.OEWriteMolecule(ofs, mol)

        dots.Update()

    dots.Total()
    ofs.close()

    print("Indexing %s" % ofname)
    if not oechem.OECreateMolDatabaseIdx(ofname):
        oechem.OEThrow.Fatal("Failed to index %s" % argv[2])

    return 0


if __name__ == '__main__':
    sys.exit(main(sys.argv))

Listing 12: Run FastROCS with the implicit Mills-Dean color force field, sans rings.

#!/usr/bin/env python
# (C) 2022 Cadence Design Systems, Inc. (Cadence) 
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.

import os
import sys

from openeye import oechem
from openeye import oefastrocs
from openeye import oeshape

oepy = os.path.join(os.path.dirname(__file__), "..", "python")
sys.path.insert(0, os.path.realpath(oepy))


def main(argv=[__name__]):
    if len(argv) < 3:
        oechem.OEThrow.Usage("%s <database> [<queries> ... ]" % argv[0])

    if not oefastrocs.OEFastROCSIsGPUReady():
        oechem.OEThrow.Info("No supported GPU available!")
        return 0

    dbname = argv[1]
    # read in database
    ifs = oechem.oemolistream()
    if not ifs.open(dbname):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    print("Opening database file %s ..." % dbname)
    timer = oechem.OEWallTimer()
    cff = oeshape.OEColorForceField()
    cff.Init(oeshape.OEColorFFType_ImplicitMillsDeanNoRings)
    dbase = oefastrocs.OEShapeDatabase(cff)
    moldb = oechem.OEMolDatabase()
    if not moldb.Open(ifs):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    dots = oechem.OEThreadedDots(10000, 200, "conformers")
    if not dbase.Open(moldb, dots):
        oechem.OEThrow.Fatal("Unable to initialize OEShapeDatabase on '%s'" % dbname)

    dots.Total()
    print("%f seconds to load database" % timer.Elapsed())

    opts = oefastrocs.OEShapeDatabaseOptions()

    for qfname in argv[2:]:
        # read in query
        qfs = oechem.oemolistream()
        if not qfs.open(qfname):
            oechem.OEThrow.Fatal("Unable to open '%s'" % qfname)

        query = oechem.OEGraphMol()
        if not oechem.OEReadMolecule(qfs, query):
            oechem.OEThrow.Fatal("Unable to read query from '%s'" % qfname)

        ext = oechem.OEGetFileExtension(qfname)
        base = qfname[:-(len(ext) + 1)]

        # write out everthing to a similary named file
        ofs = oechem.oemolostream()
        ofname = base + "_results." + ext
        if not ofs.open(ofname):
            oechem.OEThrow.Fatal("Unable to open '%s'" % argv[4])

        print("Searching for %s" % qfname)
        numhits = moldb.NumMols()
        opts.SetLimit(numhits)

        for score in dbase.GetSortedScores(query, opts):
            dbmol = oechem.OEMol()
            molidx = score.GetMolIdx()
            if not moldb.GetMolecule(dbmol, molidx):
                print("Unable to retrieve molecule '%u' from the database" % molidx)
                continue

            mol = oechem.OEGraphMol(dbmol.GetConf(oechem.OEHasConfIdx(score.GetConfIdx())))

            oechem.OESetSDData(mol, "ShapeTanimoto", "%.4f" % score.GetShapeTanimoto())
            oechem.OESetSDData(mol, "ColorTanimoto", "%.4f" % score.GetColorTanimoto())
            oechem.OESetSDData(mol, "TanimotoCombo", "%.4f" % score.GetTanimotoCombo())
            score.Transform(mol)

            oechem.OEWriteMolecule(ofs, mol)
        print("Wrote results to %s" % ofname)

    return 0


if __name__ == '__main__':
    sys.exit(main(sys.argv))

Listing 13: Database searching with queries using the inertial at heavy atoms starting orientation.

#!/usr/bin/env python
# (C) 2022 Cadence Design Systems, Inc. (Cadence) 
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.

import os
import sys

from openeye import oechem
from openeye import oefastrocs

oepy = os.path.join(os.path.dirname(__file__), "..", "python")
sys.path.insert(0, os.path.realpath(oepy))


def main(argv=[__name__]):
    if len(argv) < 4:
        oechem.OEThrow.Usage("%s <database.oeb> <queries> <hits.oeb>" % argv[0])
        return 0

    # check system
    if not oefastrocs.OEFastROCSIsGPUReady():
        oechem.OEThrow.Info("No supported GPU available!")
        return 0

    # read in database
    dbname = argv[1]
    if oechem.OEIsGZip(dbname):
        oechem.OEThrow.Fatal("%s is an unsupported database file format as it is gzipped.\n"
                             "Preferred formats are .oeb, .sdf or .oez", dbname)

    print("Opening database file %s ..." % dbname)
    dbase = oefastrocs.OEShapeDatabase()
    moldb = oechem.OEMolDatabase()

    if not moldb.Open(dbname):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    dots = oechem.OEThreadedDots(10000, 200, "conformers")
    if not dbase.Open(moldb, dots):
        oechem.OEThrow.Fatal("Unable to initialize OEShapeDatabase on '%s'" % dbname)

    # customize search options
    opts = oefastrocs.OEShapeDatabaseOptions()

    opts.SetLimit(5)
    opts.SetInitialOrientation(oefastrocs.OEFastROCSOrientation_InertialAtHeavyAtoms)

    qfname = argv[2]
    # read in query
    qfs = oechem.oemolistream()
    if not qfs.open(qfname):
        oechem.OEThrow.Fatal("Unable to open '%s'" % qfname)

    query = oechem.OEGraphMol()
    if not oechem.OEReadMolecule(qfs, query):
        oechem.OEThrow.Fatal("Unable to read query from '%s'" % qfname)

    # write out everthing to a similary named file
    ofs = oechem.oemolostream()
    if not ofs.open(argv[3]):
        oechem.OEThrow.Fatal("Unable to open '%s'" % argv[3])
    oechem.OEWriteMolecule(ofs, query)

    if opts.GetInitialOrientation() == oefastrocs.OEFastROCSOrientation_InertialAtHeavyAtoms:
        numStarts = opts.GetNumHeavyAtomStarts(query)
        print("This example will use %u starts" % numStarts)

    opts.SetMaxOverlays(opts.GetNumInertialStarts() * opts.GetNumHeavyAtomStarts(query))

    print("Searching for %s" % qfname)
    for score in dbase.GetSortedScores(query, opts):
        print("Score for mol %u(conf %u) %f shape %f color" % (
               score.GetMolIdx(), score.GetConfIdx(),
               score.GetShapeTanimoto(), score.GetColorTanimoto()))
        dbmol = oechem.OEMol()
        molidx = score.GetMolIdx()
        if not moldb.GetMolecule(dbmol, molidx):
            print("Unable to retrieve molecule '%u' from the database" % molidx)
            continue

        mol = oechem.OEGraphMol(dbmol.GetConf(oechem.OEHasConfIdx(score.GetConfIdx())))
        oechem.OESetSDData(mol, "ShapeTanimoto", "%.4f" % score.GetShapeTanimoto())
        oechem.OESetSDData(mol, "ColorTanimoto", "%.4f" % score.GetColorTanimoto())
        oechem.OESetSDData(mol, "TanimotoCombo", "%.4f" % score.GetTanimotoCombo())
        score.Transform(mol)

        oechem.OEWriteMolecule(ofs, mol)
    print("Wrote results to %s" % argv[3])

    return 0


if __name__ == '__main__':
    sys.exit(main(sys.argv))

Listing 14: Turn on ROCS mode.

#!/usr/bin/env python
# (C) 2022 Cadence Design Systems, Inc. (Cadence) 
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.

import os
import sys

from openeye import oechem
from openeye import oefastrocs

oepy = os.path.join(os.path.dirname(__file__), "..", "python")
sys.path.insert(0, os.path.realpath(oepy))


def main(argv=[__name__]):
    if len(argv) < 3:
        oechem.OEThrow.Usage("%s <database> [<queries> ... ]" % argv[0])

    if not oefastrocs.OEFastROCSIsGPUReady():
        oechem.OEThrow.Info("No supported GPU available!")
        return 0

    dbname = argv[1]
    # read in database
    ifs = oechem.oemolistream()
    if not ifs.open(dbname):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    print("Opening database file %s ..." % dbname)
    timer = oechem.OEWallTimer()
    dbase = oefastrocs.OEShapeDatabase()
    moldb = oechem.OEMolDatabase()
    if not moldb.Open(ifs):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    dots = oechem.OEThreadedDots(10000, 200, "conformers")
    if not dbase.Open(moldb, dots):
        oechem.OEThrow.Fatal("Unable to initialize OEShapeDatabase on '%s'" % dbname)

    dots.Total()
    print("%f seconds to load database" % timer.Elapsed())

    opts = oefastrocs.OEShapeDatabaseOptions()
    opts.SetFastROCSMode(oefastrocs.OEFastROCSMode_ROCS)

    for qfname in argv[2:]:
        # read in query
        qfs = oechem.oemolistream()
        if not qfs.open(qfname):
            oechem.OEThrow.Fatal("Unable to open '%s'" % qfname)

        query = oechem.OEGraphMol()
        if not oechem.OEReadMolecule(qfs, query):
            oechem.OEThrow.Fatal("Unable to read query from '%s'" % qfname)

        ext = oechem.OEGetFileExtension(qfname)
        base = qfname[:-(len(ext) + 1)]

        # write out everthing to a similary named file
        ofs = oechem.oemolostream()
        ofname = base + "_results." + ext
        if not ofs.open(ofname):
            oechem.OEThrow.Fatal("Unable to open '%s'" % argv[4])

        print("Searching for %s" % qfname)
        numhits = moldb.NumMols()
        opts.SetLimit(numhits)

        for score in dbase.GetSortedScores(query, opts):
            dbmol = oechem.OEMol()
            molidx = score.GetMolIdx()
            if not moldb.GetMolecule(dbmol, molidx):
                print("Unable to retrieve molecule '%u' from the database" % molidx)
                continue

            mol = oechem.OEGraphMol(dbmol.GetConf(oechem.OEHasConfIdx(score.GetConfIdx())))

            oechem.OESetSDData(mol, "ShapeTanimoto", "%.4f" % score.GetShapeTanimoto())
            oechem.OESetSDData(mol, "ColorTanimoto", "%.4f" % score.GetColorTanimoto())
            oechem.OESetSDData(mol, "TanimotoCombo", "%.4f" % score.GetTanimotoCombo())
            score.Transform(mol)

            oechem.OEWriteMolecule(ofs, mol)
        print("Wrote results to %s" % ofname)

    return 0


if __name__ == '__main__':
    sys.exit(main(sys.argv))

Listing 15: Cluster database into shape clusters.

#!/usr/bin/env python
# (C) 2022 Cadence Design Systems, Inc. (Cadence) 
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.

# Shape clustering
import sys
import os

from openeye import oechem
from openeye import oeshape
from openeye import oefastrocs

oepy = os.path.join(os.path.dirname(__file__), "..", "python")
sys.path.insert(0, os.path.realpath(oepy))


def GetScoreGetter(shapeOnly=False):
    if shapeOnly:
        return oefastrocs.OEShapeDatabaseScore.GetShapeTanimoto
    return oefastrocs.OEShapeDatabaseScore.GetTanimotoCombo


class ShapeCluster:
    def __init__(self, dbname, cutoff, shapeOnly):
        self.cutoff = cutoff

        # set up and options and database based upon shapeOnly
        self.defaultOptions = oefastrocs.OEShapeDatabaseOptions()
        dbtype = oefastrocs.OEShapeDatabaseType_Default
        if shapeOnly:
            dbtype = oefastrocs.OEShapeDatabaseType_Shape

        self.defaultOptions.SetScoreType(dbtype)
        self.shapedb = oefastrocs.OEShapeDatabase(dbtype)
        self.dbmols = []
        volumes = []

        # read in database
        ifs = oechem.oemolistream()
        if not ifs.open(dbname):
            oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

        count = 0
        for mol in ifs.GetOEGraphMols():
            title = mol.GetTitle()
            if not title:
                title = "Untitled" + str(count)
                mol.SetTitle(title)
                count += 1

            idx = self.shapedb.AddMol(oechem.OEMol(mol))

            volume = oeshape.OEGetCachedSelfShape(mol)
            if volume == 0.0:
                volume = oeshape.OESelfShape(mol)
            volumes.append((volume, idx))

            dbmol = oechem.OEGraphMol(mol, oechem.OEMolBaseType_OEDBMol)
            dbmol.Compress()
            self.dbmols.append(dbmol)

        numMols = len(volumes)

        # find the molecule with the median volume as our first query
        volumes.sort()
        medianVolume, medianIdx = volumes[numMols // 2]

        self.nextClusterHeadIdx = medianIdx
        self.remainingMolecules = numMols

        self.tanimotos = [0.0] * numMols

        self.scoreGetter = GetScoreGetter(shapeOnly)

    def HasRemainingMolecules(self):
        return self.remainingMolecules != 0

    def _removeMolecule(self, idx):
        self.remainingMolecules -= 1

        assert self.dbmols[idx] is not None
        dbmol = self.dbmols[idx]
        dbmol.UnCompress()
        self.dbmols[idx] = None

        assert self.tanimotos[idx] is not None
        self.tanimotos[idx] = sys.float_info.max

        return dbmol

    def GetNextClusterHead(self):
        assert self.nextClusterHeadIdx is not None
        return self._removeMolecule(self.nextClusterHeadIdx)

    def GetCluster(self, query):
        options = oefastrocs.OEShapeDatabaseOptions(self.defaultOptions)

        dots = oechem.OEDots(10000, 200, "molecules searched")

        minTani = sys.float_info.max
        minIdx = None
        for score in self.shapedb.GetScores(query, options):
            idx = score.GetMolIdx()
            # check if already in a cluster
            if self.dbmols[idx] is None:
                continue

            if self.cutoff < self.scoreGetter(score):
                yield self._removeMolecule(idx), score
            else:
                self.tanimotos[idx] = max(self.tanimotos[idx], self.scoreGetter(score))

                minTani, minIdx = min((minTani, minIdx), (self.tanimotos[idx], idx))
            dots.Update()
        dots.Total()

        self.nextClusterHeadIdx = minIdx


InterfaceData = """\
!BRIEF [-shapeOnly] [-cutoff 0.75] [-dbase] <database> [-clusters] <clusters.oeb>
!PARAMETER -dbase
  !TYPE string
  !REQUIRED true
  !BRIEF Input database to select from
  !KEYLESS 1
!END
!PARAMETER -clusters
  !TYPE string
  !REQUIRED true
  !BRIEF Output to write clusters to
  !KEYLESS 2
!END
!PARAMETER -shapeOnly
  !ALIAS -s
  !TYPE bool
  !DEFAULT false
  !BRIEF Run FastROCS in shape only mode.
!END
!PARAMETER -cutoff
  !ALIAS -c
  !TYPE float
  !DEFAULT 0.75
  !BRIEF Number of random pairs to sample.
!END
"""


def main(argv=[__name__]):
    itf = oechem.OEInterface(InterfaceData, argv)

    dbname = itf.GetString("-dbase")
    if oechem.OEIsGZip(dbname):
        oechem.OEThrow.Fatal("%s is an unsupported database file format as it is gzipped.\n"
                             "Preferred formats are .oeb, .sdf or .oez", dbname)

    cutoff = itf.GetFloat("-cutoff")

    ofs = oechem.oemolostream()
    if not ofs.open(itf.GetString("-clusters")):
        oechem.OEThrow.Fatal("Unable to open '%s'" % itf.GetString("-clusters"))

    if ofs.GetFormat() != oechem.OEFormat_OEB:
        oechem.OEThrow.Fatal("Output file must be OEB")

    sdtag = "TanimotoComboFromHead"
    if itf.GetBool("-shapeOnly"):
        sdtag = "ShapeTanimotoFromHead"
    getter = GetScoreGetter(itf.GetBool("-shapeOnly"))

    cluster = ShapeCluster(dbname, cutoff, itf.GetBool("-shapeOnly"))

    # do the clustering
    while cluster.HasRemainingMolecules():
        clusterHead = cluster.GetNextClusterHead()
        print("Searching for neighbors of %s" % clusterHead.GetTitle())

        for nbrMol, score in cluster.GetCluster(clusterHead):
            oechem.OESetSDData(nbrMol, sdtag, "%.4f" % getter(score))

            score.Transform(nbrMol)

            clusterHead.AddData(nbrMol.GetTitle(), nbrMol)

        oechem.OEWriteMolecule(ofs, clusterHead)

    return 0


if __name__ == '__main__':
    sys.exit(main(sys.argv))

Listing 16: Split database into chunks.

#!/usr/bin/env python
# (C) 2022 Cadence Design Systems, Inc. (Cadence) 
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.

# Split a multi-conformer database into N chunks keeping molecules
# with the same number of atoms in each chunk. Also caches other
# useful information onto the molecule to improve database load time.

import sys
import os

from openeye import oechem
from openeye import oefastrocs

oepy = os.path.join(os.path.dirname(__file__), "..", "python")
sys.path.insert(0, os.path.realpath(oepy))


def main(argv=[__name__]):
    if len(argv) != 4:
        oechem.OEThrow.Usage("%s <database> <prefix> <n_servers>" % argv[0])

    # input - preserve rotor-offset-compression
    ifs = oechem.oemolistream()
    oechem.OEPreserveRotCompress(ifs)

    ifname = argv[1]
    if not ifs.open(ifname):
        oechem.OEThrow.Fatal("Unable to open %s for reading" % argv[1])

    # output
    prefix = argv[2]
    ext = oechem.OEGetFileExtension(prefix)
    extstrt = len(prefix)
    if ext:
        extstrt = -(len(ext) + 1)
    else:
        ext = oechem.OEGetFileExtension(ifname)
    base = prefix[:extstrt]
    fmt = base + "_%i." + ext

    nservers = int(argv[3])
    outstrms = []
    for i in range(1, nservers + 1):
        ofs = oechem.oemolostream()
        if not ofs.open(fmt % i):
            oechem.OEThrow.Fatal("Unable to open %s for writing" % argv[2])

        outstrms.append(ofs)

    dots = oechem.OEDots(10000, 200, "molecules")
    for mol in ifs.GetOEMols():
        oefastrocs.OEPrepareFastROCSMol(mol)

        nhvyatoms = oechem.OECount(mol, oechem.OEIsHeavy())

        ofs = outstrms[nhvyatoms % nservers]
        oechem.OEWriteMolecule(ofs, mol)

        dots.Update()

    dots.Total()

    for strm in outstrms:
        fname = strm.GetFileName()
        strm.close()
        oechem.OEThrow.Info("Indexing %s" % fname)
        if not oechem.OECreateMolDatabaseIdx(fname):
            oechem.OEThrow.Fatal("Failed to index %s" % fname)

    return 0


if __name__ == '__main__':
    sys.exit(main(sys.argv))

Listing 17: Send query to specified server.

#!/usr/bin/env python
# (C) 2022 Cadence Design Systems, Inc. (Cadence) 
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.

import os
import sys
import argparse

try:
    from xmlrpclib import ServerProxy, Binary, Fault
except ImportError:  # python 3
    from xmlrpc.client import ServerProxy, Binary, Fault


def GetFormatExtension(fname):
    base, ext = os.path.splitext(fname.lower())
    if ext == ".gz":
        base, ext = os.path.splitext(base)
        ext += ".gz"
    return ext


def main(argv=[__name__]):

    parser = argparse.ArgumentParser()

    # positional arguments retaining backward compatibility
    parser.add_argument('server:port', help='Server name and port number of database to search '
                                            'i.e. localhost:8080.')
    parser.add_argument('query', help='File containing the query molecule to search '
                                      '(format not restricted to *.oeb).')
    parser.add_argument('results',
                        help='Output file to store results (format not restricted to *.oeb).')
    parser.add_argument('nHits',  nargs='?', type=int, default=100,
                        help='Number of hits to return (default=100).')
    parser.add_argument('--tversky', action='store_true', default=argparse.SUPPRESS,
                        help='Switch to Tversky similarity scoring (default=Tanimoto).')
    parser.add_argument('--shapeOnly', action='store_true', default=argparse.SUPPRESS,
                        help='Switch to shape-only scores (default=Combo).')
    parser.add_argument('--alternativeStarts', default=argparse.SUPPRESS, nargs=1, dest='altStarts',
                        choices=('random', 'subrocs',
                                 'inertialAtHeavyAtoms', 'inertialAtColorAtoms'),
                        help='Optimize using alternative starts (default=inertial). '
                             'To perform N random starts do '
                             '"--alternativeStarts random N" (default N=10)')

    known, remaining = (parser.parse_known_args())
    dargs = vars(known)

    qfname = dargs.pop('query')
    numHits = dargs.pop('nHits')

    startType = dargs.get('altStarts', None)

    if startType:
        dargs['altStarts'] = str(startType[0])
        if len(remaining) == 1 and dargs['altStarts'] == 'random':
            try:
                numRands = int(remaining[0])
                dargs['randStarts'] = numRands
            except ValueError:
                print("Invalid argument given. See --help menu for argument list")
                sys.exit()
        if len(remaining) > 1:
            print("Too many arguments given. See --help menu for argument list")
            sys.exit()
    else:
        if remaining:
            print("Too many arguments given. See --help menu for argument list")
            sys.exit()

    try:
        fh = open(qfname, 'rb')
    except IOError:
        sys.stderr.write("Unable to open '%s' for reading" % qfname)
        return 1

    iformat = GetFormatExtension(qfname)

    ofname = dargs.pop('results')
    oformat = GetFormatExtension(ofname)

    s = ServerProxy("http://" + dargs.pop('server:port'))
    data = Binary(fh.read())

    try:
        idx = s.SubmitQuery(data, numHits, iformat, oformat, dargs)
    except Fault as e:
        if "TypeError" in e.faultString:
            # we're trying to run against an older server, may be able
            # to still work if the formats ameniable.
            if ((iformat == ".oeb" or iformat == ".sq") and oformat == ".oeb"):
                idx = s.SubmitQuery(data, numHits)
            else:
                sys.stderr.write("%s is too new of a version to work with the server %s\n"
                                 % (argv[0], argv[1]))
                sys.stderr.write("Please upgrade your server to FastROCS version 1.4.0"
                                 " or later to be able to use this client\n")
                sys.stderr.write("This client will work with this version of the server "
                                 "if the input file is either"
                                 "'.oeb' or '.sq' and the output file is '.oeb'\n")
                return 1
        else:
            sys.stderr.write(str(e))
            return 1

    first = False
    while True:
        blocking = True
        try:
            current, total = s.QueryStatus(idx, blocking)
        except Fault as e:
            print(str(e), file=sys.stderr)
            return 1

        if total == 0:
            continue

        if first:
            print("%s/%s" % ("current", "total"))
            first = False
        print("%i/%i" % (current, total))

        if total <= current:
            break

    results = s.QueryResults(idx)

    # assuming the results come back as a string in the requested format
    with open(ofname, 'wb') as output:
        output.write(results.data)

    return 0


if __name__ == '__main__':
    sys.exit(main(sys.argv))

Listing 18: Run the FastROCS server.

#!/usr/bin/env python
# (C) 2022 Cadence Design Systems, Inc. (Cadence) 
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.

import sys
import os
import socket

try:
    from SocketServer import ThreadingMixIn
except ImportError:
    from socketserver import ThreadingMixIn
from threading import Thread
from threading import Lock
from threading import Condition
from threading import Event

from openeye import oechem
from openeye import oeshape

try:
    from openeye import oefastrocs
except ImportError:
    oechem.OEThrow.Fatal("This script is not available, "
                         "FastROCS is not supported on this platform.")

try:
    from xmlrpclib import Binary
    from SimpleXMLRPCServer import SimpleXMLRPCServer, SimpleXMLRPCRequestHandler
except ImportError:  # python 3
    from xmlrpc.client import Binary
    from xmlrpc.server import SimpleXMLRPCServer, SimpleXMLRPCRequestHandler

oepy = os.path.join(os.path.dirname(__file__), "..", "python")
sys.path.insert(0, os.path.realpath(oepy))

# very important that OEChem is in this mode since we are passing molecules between threads
oechem.OESetMemPoolMode(oechem.OEMemPoolMode_System)


class ReadWriteLock(object):
    """ Basic locking primitive that allows multiple readers but only
    a single writer at a time. Useful for synchronizing database
    updates. Priority is given to pending writers. """
    def __init__(self):
        self.cond = Condition()
        self.readers = 0
        self.writers = 0

    def AcquireReadLock(self):
        self.cond.acquire()
        try:
            while self.writers:
                self.cond.wait()

            self.readers += 1
            assert self.writers == 0
        finally:
            self.cond.notify_all()
            self.cond.release()

    def ReleaseReadLock(self):
        self.cond.acquire()
        assert self.readers > 0
        try:
            self.readers -= 1
        finally:
            self.cond.notify_all()
            self.cond.release()

    def AcquireWriteLock(self):
        self.cond.acquire()
        self.writers += 1
        while self.readers:
            self.cond.wait()

        assert self.readers == 0
        assert self.writers > 0

    def ReleaseWriteLock(self):
        assert self.readers == 0
        assert self.writers > 0

        self.writers -= 1
        self.cond.notify_all()
        self.cond.release()


class ShapeQueryThread(Thread):
    """ A thread to run a query against a shape database """

    def __init__(self, shapedb, querymolstr, nhits, iformat, oformat, errorLevel, **kwargs):
        """ Create a new thread to perform a query. The query doesn't
        execute until start is called.
        shapedb - database to run the query against

        See MCMolShapeDatabase.GetBestOverlays for a description of
        the querymolstr and nhits arguments.
        """
        Thread.__init__(self)

        self.shapeOnly = kwargs.pop('shapeOnly', False)
        self.tversky = kwargs.pop('tversky', False)
        self.altStarts = kwargs.pop('altStarts', False)
        self.randStarts = kwargs.pop('randStarts', False)

        self.shapedb = shapedb
        self.querymolstr = querymolstr
        self.iformat = iformat
        self.oformat = oformat
        self.scoretype = GetDatabaseType(self.shapeOnly)
        self.simFuncType = GetSimFuncType(self.tversky)

        numHistBins = 200
        if self.shapeOnly:
            numHistBins = 100
        self.tracer = oefastrocs.OEDBTracer(numHistBins)
        self.options = oefastrocs.OEShapeDatabaseOptions()
        self.options.SetTracer(self.tracer)
        self.options.SetLimit(nhits)
        self.options.SetScoreType(self.scoretype)
        self.options.SetSimFunc(self.simFuncType)

        if self.altStarts:
            self.options.SetInitialOrientation(GetStartType(self.altStarts))
            if self.randStarts:
                self.options.SetNumRandomStarts(self.randStarts)

        self.lock = Lock()
        self.errorLevel = errorLevel

    def run(self):
        """ Perform the query """
        # make sure the error level is set for this operating system thread
        oechem.OEThrow.SetLevel(self.errorLevel)
        try:
            results = self.shapedb.GetBestOverlays(self.querymolstr,
                                                   self.options,
                                                   self.iformat,
                                                   self.oformat)

            # since we are writing to the thread's dictionary this could
            # race with the GetStatus method below
            self.lock.acquire()
            try:
                self.results = results
                if not results:
                    self.exception = RuntimeError("Query error, no results to return, "
                                                  "check the server log for more information")
            finally:
                self.lock.release()

        except Exception as e:
            self.lock.acquire()
            try:
                self.exception = e
            finally:
                self.lock.release()

    def GetStatus(self, blocking):
        """ Returns a tuple of (count, total). count is the number of
        conformers already searched. total is the total number of
        conformers that will be searched.

        If blocking is True this method will not return until the
        count has been changed (beware of deadlocks!). If blocking is
        False the function will return immediately.
        """
        self.lock.acquire()
        try:
            if hasattr(self, "exception"):
                raise self.exception

            return self.tracer.GetCounts(blocking), self.tracer.GetTotal()
        finally:
            self.lock.release()

    def GetHistogram(self):
        """ Returns a list of integers representing the histogram of
        the molecule scores already scored.
        """
        self.lock.acquire()
        try:
            if hasattr(self, "exception"):
                raise self.exception

            hist = self.tracer.GetHistogram()
            scoretype = self.scoretype
        finally:
            self.lock.release()

        frequencies = oechem.OEUIntVector()
        hist.GetHistogram(frequencies, scoretype)
        return list(frequencies)

    def GetResults(self):
        """ Return an OEB string containing the overlaid
        confomers. This method should only be called after this thread
        has been joined. """

        if hasattr(self, "exception"):
            raise self.exception

        return self.results


class ShapeQueryThreadPool:
    """
    Maintains a pool of threads querying the same MCMolShapeDatabase.
    """
    def __init__(self, dbase):
        """ Create a new thread pool to issues queries to dbase """
        self.shapedb = dbase
        self.queryidx = 0
        self.threads = {}
        self.lock = Lock()
        self.errorLevel = oechem.OEThrow.GetLevel()

    def SubmitQuery(self, querymolstr, nhits, iformat, oformat, kwargs):
        """ Returns an index that can be passed to the QueryStatus and
        QueryResults methods.

        See MCMolShapeDatabase.GetBestOverlays for a description of
        the querymolstr and nhits arguments.
        """
        self.lock.acquire()
        try:
            idx = self.queryidx
            self.queryidx += 1
            self.threads[idx] = ShapeQueryThread(self.shapedb,
                                                 querymolstr,
                                                 nhits,
                                                 iformat,
                                                 oformat,
                                                 self.errorLevel,
                                                 **kwargs)
            self.threads[idx].start()
        finally:
            self.lock.release()

        return idx

    def QueryStatus(self, idx, blocking):
        """ Returns the status of the query indicated by idx. See
        ShapeQueryThread.GetStatus for the description of the blocking
        argument. """
        self.lock.acquire()
        try:
            thrd = self.threads[idx]
        finally:
            self.lock.release()

        return thrd.GetStatus(blocking)

    def QueryHistogram(self, idx):
        """ Returns the histogram of molecule scores already scored
        for the query indicated by idx. """
        self.lock.acquire()
        try:
            thrd = self.threads[idx]
        finally:
            self.lock.release()

        return thrd.GetHistogram()

    def QueryResults(self, idx):
        """ Wait for the query associated with idx to complete and
        then return the results as an OEB string. """
        self.lock.acquire()
        try:
            thrd = self.threads[idx]
            del self.threads[idx]
        finally:
            self.lock.release()

        thrd.join()
        return thrd.GetResults()

    def SetLevel(self, level):
        """ Set what level of information should be printed by the server. """
        self.errorLevel = level
        return True


class DatabaseLoaderThread(Thread):
    """ A thread to read a database into memory. Special note, OEChem
    must be placed in system allocation mode using
    oechem.OESetMemPoolMode(oechem.OEMemPoolMode_System). This is because the
    default OEChem memory caching scheme uses thread local storage,
    but since this thread is temporary only for reading in molecules
    that memory will be deallocated when this thread is terminated."""
    def __init__(self, shapedb, moldb, dbname, loadedEvent):
        """
        shapedb - the shapedb to add the molecules to
        moldb   - the OEMolDatabase object to use
        dbname  - the file name to open the OEMolDatabase on
        loadedEvent - event to set once loading is finished
        """
        Thread.__init__(self)
        self.shapedb = shapedb
        self.moldb = moldb
        self.dbname = dbname
        self.loadedEvent = loadedEvent

    def run(self):
        """ Open the database file and load it into the OEShapeDatabase """
        timer = oechem.OEWallTimer()
        sys.stderr.write("Opening database file %s ...\n" % self.dbname)
        if not self.moldb.Open(self.dbname):
            oechem.OEThrow.Fatal("Unable to open '%s'" % self.dbname)

        dots = oechem.OEThreadedDots(10000, 200, "conformers")
        if not self.shapedb.Open(self.moldb, dots):
            oechem.OEThrow.Fatal("Unable to initialize OEShapeDatabase on '%s'" % self.dbname)

        dots.Total()
        sys.stderr.write("%s seconds to load database\n" % timer.Elapsed())
        self.loadedEvent.set()


def SetupStream(strm, format):
    format = format.strip('.')
    ftype = oechem.OEGetFileType(format)
    if ftype == oechem.OEFormat_UNDEFINED:
        raise ValueError("Unsupported file format sent to server '%s'" % format)
    strm.SetFormat(ftype)
    strm.Setgz(oechem.OEIsGZip(format))
    return strm


OECOLOR_FORCEFIELDS = {
    "ImplicitMillsDean": oeshape.OEColorFFType_ImplicitMillsDean,
    "ImplicitMillsDeanNoRings": oeshape.OEColorFFType_ImplicitMillsDeanNoRings,
    "ExplicitMillsDean": oeshape.OEColorFFType_ExplicitMillsDean,
    "ExplicitMillsDeanNoRings": oeshape.OEColorFFType_ExplicitMillsDeanNoRings
    }


def GetDatabaseType(shapeOnly):
    if shapeOnly:
        return oefastrocs.OEShapeDatabaseType_Shape
    return oefastrocs.OEShapeDatabaseType_Default


def GetSimFuncType(simFuncType):
    if simFuncType:
        return oefastrocs.OEShapeSimFuncType_Tversky
    return oefastrocs.OEShapeSimFuncType_Tanimoto


def GetStartType(altStarts):
    if altStarts == 'random':
        return oefastrocs.OEFastROCSOrientation_Random
    if altStarts == 'inertialAtHeavyAtoms':
        return oefastrocs.OEFastROCSOrientation_InertialAtHeavyAtoms
    if altStarts == 'inertialAtColorAtoms':
        return oefastrocs.OEFastROCSOrientation_InertialAtColorAtoms
    if altStarts == 'subrocs':
        return oefastrocs.OEFastROCSOrientation_Subrocs
    return oefastrocs.OEFastROCSOrientation_Inertial


def GetAltStartsString(altStarts):
    if altStarts == oefastrocs.OEFastROCSOrientation_Random:
        return 'random'
    if altStarts == oefastrocs.OEFastROCSOrientation_InertialAtHeavyAtoms:
        return 'inertialAtHeavyAtoms'
    if altStarts == oefastrocs.OEFastROCSOrientation_InertialAtColorAtoms:
        return 'inertialAtColorAtoms'
    if altStarts == oefastrocs.OEFastROCSOrientation_Subrocs:
        return 'subrocs'
    return 'inertial'


def GetShapeDatabaseArgs(itf):
    shapeOnly = itf.GetBool("-shapeOnly")
    if shapeOnly and itf.GetParameter("-chemff").GetHasValue():
        oechem.OEThrow.Fatal("Unable to specify -shapeOnly and -chemff at the same time!")

    chemff = itf.GetString("-chemff")
    if not chemff.endswith(".cff"):
        return (GetDatabaseType(shapeOnly), OECOLOR_FORCEFIELDS[chemff])

    # given a .cff file, use that to construct a OEColorForceField
    assert not shapeOnly
    cff = oeshape.OEColorForceField()
    if not cff.Init(chemff):
        oechem.OEThrow.Fatal("Unable to read color force field from '%s'" % chemff)

    return (cff,)


def ReadShapeQuery(querymolstr):
    iss = oechem.oeisstream(querymolstr)
    query = oeshape.OEShapeQueryPublic()

    if not oeshape.OEReadShapeQuery(iss, query):
        raise ValueError("Unable to read a shape query from the data string")

    return query


class MCMolShapeDatabase:
    """ Maintains a database of MCMols that can be queried by shape
    similarity."""
    def __init__(self, itf):
        """ Create a MCMolShapeDatabase from the parameters specified by the OEInterface. """
        self.rwlock = ReadWriteLock()
        self.loadedEvent = Event()

        self.dbname = itf.GetString("-dbase")
        if oechem.OEIsGZip(self.dbname):
            oechem.OEThrow.Fatal("%s is an unsupported database file format as it is gzipped. "
                                 "Preferred formats are .oeb, .sdf or .oez" % self.dbname)

        self.moldb = oechem.OEMolDatabase()

        self.dbtype = GetDatabaseType(itf.GetBool("-shapeOnly"))
        self.shapedb = oefastrocs.OEShapeDatabase(*GetShapeDatabaseArgs(itf))

        # this thread is daemonic so a KeyboardInterupt
        # during the load will cancel the process
        self.loaderThread = DatabaseLoaderThread(self.shapedb,
                                                 self.moldb,
                                                 self.dbname,
                                                 self.loadedEvent)
        self.loaderThread.daemon = True
        self.loaderThread.start()

    def IsLoaded(self, blocking=False):
        """ Return whether the server has finished loading. """
        if blocking:
            self.loadedEvent.wait()

        # clean up the load waiter thread if it's still there
        if self.loadedEvent.is_set() and self.loaderThread is not None:
            self.rwlock.AcquireWriteLock()
            try:  # typical double checked locking
                if self.loaderThread is not None:
                    self.loaderThread.join()
                    self.loaderThread = None
            finally:
                self.rwlock.ReleaseWriteLock()

        return self.loadedEvent.is_set()

    def GetBestOverlays(self, querymolstr, options, iformat, oformat):
        """ Return a string of the format specified by 'oformat'
        containing nhits overlaid confomers using querymolstr as the
        query interpretted as iformat.

        querymolstr - a string containing a molecule to use as the query
        options - an instance of OEShapeDatabaseOptions
        iformat - a string representing the file extension to parse the querymolstr as.
                  Note: old clients could be passing .sq files, so
                  iformat == '.oeb' will try to interpret the file as
                  a .sq file.
        oformat - file format to write the results as
        """
        timer = oechem.OEWallTimer()

        # make sure to wait for the load to finish
        blocking = True
        loaded = self.IsLoaded(blocking)
        assert loaded

        if iformat.startswith(".sq"):
            query = ReadShapeQuery(querymolstr)
        else:
            # read in query
            qfs = oechem.oemolistream()
            qfs = SetupStream(qfs, iformat)
            if not qfs.openstring(querymolstr):
                raise ValueError("Unable to open input molecule string")

            query = oechem.OEGraphMol()
            if not oechem.OEReadMolecule(qfs, query):
                if iformat == ".oeb":  # could be an old client trying to send a .sq file.
                    query = ReadShapeQuery(querymolstr)
                else:
                    raise ValueError("Unable to read a molecule from the string of format '%s'"
                                     % iformat)

        ofs = oechem.oemolostream()
        ofs = SetupStream(ofs, oformat)
        if not ofs.openstring():
            raise ValueError("Unable to openstring for output")

        # do we only want shape based results?

        # this is a "Write" lock to be paranoid and not overload the GPU
        self.rwlock.AcquireWriteLock()
        try:
            # do search
            scores = self.shapedb.GetSortedScores(query, options)
            sys.stderr.write("%f seconds to do search\n" % timer.Elapsed())
        finally:
            self.rwlock.ReleaseWriteLock()

        timer.Start()
        # write results
        for score in scores:
            mcmol = oechem.OEMol()
            if not self.moldb.GetMolecule(mcmol, score.GetMolIdx()):
                oechem.OEThrow.Warning("Can't retrieve molecule %i from the OEMolDatabase, "
                                       "skipping..." % score.GetMolIdx())
                continue
            # remove hydrogens to make output smaller, this also
            # ensures OEPrepareFastROCSMol will have the same output
            oechem.OESuppressHydrogens(mcmol)

            mol = oechem.OEGraphMol(mcmol.GetConf(oechem.OEHasConfIdx(score.GetConfIdx())))
            oechem.OECopySDData(mol, mcmol)

            if options.GetSimFunc() == oefastrocs.OEShapeSimFuncType_Tanimoto:
                oechem.OESetSDData(mol, "ShapeTanimoto", "%.4f" % score.GetShapeTanimoto())
                oechem.OESetSDData(mol, "ColorTanimoto", "%.4f" % score.GetColorTanimoto())
                oechem.OESetSDData(mol, "TanimotoCombo", "%.4f" % score.GetTanimotoCombo())
            else:
                oechem.OESetSDData(mol, "ShapeTversky", "%.4f" % score.GetShapeTversky())
                oechem.OESetSDData(mol, "ColorTversky", "%.4f" % score.GetColorTversky())
                oechem.OESetSDData(mol, "TverskyCombo", "%.4f" % score.GetTverskyCombo())

            if options.GetInitialOrientation() != oefastrocs.OEFastROCSOrientation_Inertial:
                oechem.OEAddSDData(mol, "Opt. Starting Pos.",
                                   GetAltStartsString(options.GetInitialOrientation()))

            score.Transform(mol)

            oechem.OEWriteMolecule(ofs, mol)

        output = ofs.GetString()
        sys.stderr.write("%f seconds to write hitlist\n" % timer.Elapsed())
        sys.stderr.flush()
        ofs.close()

        return output

    def GetName(self):
        self.rwlock.AcquireReadLock()
        try:
            return self.dbname
        finally:
            self.rwlock.ReleaseReadLock()

    def SetName(self, name):
        self.rwlock.AcquireWriteLock()
        try:
            self.dbname = name
        finally:
            self.rwlock.ReleaseWriteLock()


class ShapeQueryServer:
    """ This object's methods are exposed via XMLRPC. """
    def __init__(self, itf):
        """ Initialize the server to serve queries on the database
        named by dbname."""
        self.shapedb = MCMolShapeDatabase(itf)
        self.thdpool = ShapeQueryThreadPool(self.shapedb)
        self.itf = itf

    def IsLoaded(self, blocking=False):
        """ Return whether the server has finished loading. """
        return self.shapedb.IsLoaded(blocking)

    def GetBestOverlays(self, querymolstr, nhits, iformat=".oeb", oformat=".oeb"):
        """ A blocking call that only returns once the query is completed. """
        results = self.shapedb.GetBestOverlays(querymolstr.data, nhits, iformat, oformat)
        return Binary(results)

    def SubmitQuery(self, querymolstr, nhits, iformat=".oeb", oformat=".oeb", kwargs=None):
        """ Returns a index that can be used by QueryStatus and
        QueryResults. This method will return immediately."""
        if not kwargs:
            kwargs = {}
        if self.itf.GetBool("-shapeOnly"):
            kwargs['shapeOnly'] = True

        return self.thdpool.SubmitQuery(querymolstr.data, nhits, iformat, oformat, kwargs)

    def QueryStatus(self, queryidx, blocking=False):
        """ Return the status of the query specified by queryidx. See
        ShapeQueryThread.GetStatus for a description of the blocking
        argument and the return value."""
        return self.thdpool.QueryStatus(queryidx, blocking)

    def QueryHistogram(self, queryidx):
        """ Return the current histogram of scores specified by
        queryidx."""
        return self.thdpool.QueryHistogram(queryidx)

    def QueryResults(self, queryidx):
        """ Wait for the query associated with idx to complete and
        then return the results as an OEB string. """
        results = self.thdpool.QueryResults(queryidx)
        return Binary(results)

    def GetVersion(self):
        """ Returns what version of FastROCS this server is. """
        return oefastrocs.OEFastROCSGetRelease()

    def OEThrowSetLevel(self, level):
        """ Set what level of information should be printed by the server. """
        return self.thdpool.SetLevel(level)

    def GetName(self):
        """ The name of this database. By default this is the file name of the database used. """
        return self.shapedb.GetName()

    def SetName(self, name):
        """ Set a custom database name for this server. """
        self.shapedb.SetName(name)
        return True


# Restrict to a particular path.
class RequestHandler(SimpleXMLRPCRequestHandler):
    rpc_paths = ('/RPC2',)


class AsyncXMLRPCServer(ThreadingMixIn, SimpleXMLRPCServer):
    # if a shutdown request occurs through a signal force everything to terminate immediately
    daemon_threads = True
    allow_reuse_address = True


InterfaceData = """\
!BRIEF [-shapeOnly | -chemff <color forcefield>] [-hostname] [-dbase] database [[-port] 8080]
!PARAMETER -dbase
  !TYPE string
  !REQUIRED true
  !BRIEF Input database to serve
  !KEYLESS 1
!END
!PARAMETER -port
  !TYPE int
  !REQUIRED false
  !BRIEF Port number to start the XML RPC server on
  !DEFAULT 8080
  !KEYLESS 2
!END
!PARAMETER -hostname
  !TYPE string
  !DEFAULT 0.0.0.0
  !BRIEF Name of the server to bind to
!END
!PARAMETER -shapeOnly
  !ALIAS -s
  !TYPE bool
  !DEFAULT false
  !BRIEF Run FastROCS server in shape only mode, clients can also control this separately
!END
!PARAMETER -chemff
  !TYPE string
  !LEGAL_VALUE ImplicitMillsDean
  !LEGAL_VALUE ImplicitMillsDeanNoRings
  !LEGAL_VALUE ExplicitMillsDean
  !LEGAL_VALUE ExplicitMillsDeanNoRings
  !LEGAL_VALUE *.cff
  !DEFAULT ImplicitMillsDean
  !BRIEF Chemical force field. Either a constant or a filename.
!END
"""


def main(argv=[__name__]):

    if not oefastrocs.OEFastROCSIsGPUReady():
        oechem.OEThrow.Fatal("No supported GPU available to run FastROCS TK!")

    itf = oechem.OEInterface(InterfaceData, argv)

    # default hostname to bind is 0.0.0.0, to allow connections with
    # any hostname
    hostname = itf.GetString("-hostname")

    # default port number is 8080
    portnumber = itf.GetInt("-port")

    # create server
    server = AsyncXMLRPCServer((hostname, portnumber),
                               requestHandler=RequestHandler,
                               logRequests=False)
    hostname, portnumber = server.socket.getsockname()
    if hostname == "0.0.0.0":
        hostname = socket.gethostname()
    sys.stderr.write("Listening for ShapeDatabaseClient.py requests on %s:%i\n\n"
                     % (hostname, portnumber))
    sys.stderr.write("Example: ShapeDatabaseClient.py %s:%i query.sdf hit.sdf\n\n"
                     % (hostname, portnumber))

    # register the XMLRPC methods
    server.register_introspection_functions()

    server.register_instance(ShapeQueryServer(itf))

    try:
        # Run the server's main loop
        server.serve_forever()
    finally:
        server.server_close()

    return 0


if __name__ == '__main__':
    sys.exit(main(sys.argv))

Listing 19: Returns whether the database has completed loading in server:port.

#!/usr/bin/env python
# (C) 2022 Cadence Design Systems, Inc. (Cadence) 
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.

import os
import sys
import socket

try:
    from xmlrpclib import ServerProxy
except ImportError:  # python 3
    from xmlrpc.client import ServerProxy

from time import sleep

from openeye import oechem

oepy = os.path.join(os.path.dirname(__file__), "..", "python")
sys.path.insert(0, os.path.realpath(oepy))

InterfaceData = """\
!BRIEF [-blocking] [-h] <server:port>
!PARAMETER -host
  !ALIAS -h
  !TYPE string
  !REQUIRED true
  !BRIEF The host to check to see if it is up yet
  !KEYLESS 1
!END
!PARAMETER -blocking
  !ALIAS -b
  !TYPE bool
  !DEFAULT false
  !BRIEF If true the program will not exit until the database has finished loading.
!END
!PARAMETER -retries
  !ALIAS -r
  !TYPE int
  !DEFAULT 10
  !BRIEF Number of times to try connecting to the server.
!END
!PARAMETER -timeout
  !ALIAS -t
  !TYPE float
  !DEFAULT 60.0
  !BRIEF The time between retries is the timeout divided by the number of retries.
!END
"""


def main(argv=[__name__]):
    itf = oechem.OEInterface(InterfaceData, argv)

    host = itf.GetString("-host")
    s = ServerProxy("http://" + host)

    blocking = itf.GetBool("-blocking")
    retries = itf.GetInt("-retries")
    if retries < 1:
        oechem.OEThrow.Fatal("-retries must be greater than 0")
    timeout = itf.GetFloat("-timeout")
    if timeout <= 0.0:
        oechem.OEThrow.Fatal("-timeout must be greater than 0.0")
    waittime = timeout/retries
    loaded = False
    while retries:
        try:
            loaded = s.IsLoaded(blocking)
            break
        except socket.error:
            retries -= 1
            if retries:
                print("Unable to connect to %s, retrying in %2.1f seconds" % (host, waittime))
                sleep(waittime)

    if not retries:
        print("Was never able to connect to a server, exiting...")
        return -1

    if loaded:
        loaded = "True"
    else:
        loaded = "False"

    print(host, "IsLoaded =", loaded)

    if loaded:
        return 0

    return 1


if __name__ == '__main__':
    sys.exit(main(sys.argv))

Listing 20: Adjust the verbosity of server running on server:port.

#!/usr/bin/env python
# (C) 2022 Cadence Design Systems, Inc. (Cadence) 
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.

import os
import sys

try:
    from xmlrpclib import ServerProxy
except ImportError:  # python 3
    from xmlrpc.client import ServerProxy

from openeye import oechem

oepy = os.path.join(os.path.dirname(__file__), "..", "python")
sys.path.insert(0, os.path.realpath(oepy))

InterfaceData = """\
!BRIEF (-debug|-verbose|-info|-warning|-error) [-h] <server:port>
!PARAMETER -host
  !ALIAS -h
  !TYPE string
  !REQUIRED true
  !BRIEF The host whose verbosity level will be changed
  !KEYLESS 1
!END
!PARAMETER -debug
  !ALIAS -d
  !TYPE bool
  !DEFAULT false
  !BRIEF Debug error level
!END
!PARAMETER -verbose
  !ALIAS -v
  !TYPE bool
  !DEFAULT false
  !BRIEF Verbose error level
!END
!PARAMETER -info
  !ALIAS -i
  !TYPE bool
  !DEFAULT false
  !BRIEF Info error level
!END
!PARAMETER -warning
  !ALIAS -w
  !TYPE bool
  !DEFAULT false
  !BRIEF Warning error level
!END
!PARAMETER -error
  !ALIAS -e
  !TYPE bool
  !DEFAULT false
  !BRIEF Unrecoverable error level
!END
"""


def main(argv=[__name__]):
    itf = oechem.OEInterface(InterfaceData, argv)

    levels = {"-debug": (oechem.OEErrorLevel_Debug,   "oechem.OEErrorLevel_Debug"),
              "-verbose": (oechem.OEErrorLevel_Verbose, "oechem.OEErrorLevel_Verbose"),
              "-info": (oechem.OEErrorLevel_Info,    "oechem.OEErrorLevel_Info"),
              "-warning": (oechem.OEErrorLevel_Warning, "oechem.OEErrorLevel_Warning"),
              "-error": (oechem.OEErrorLevel_Error,   "oechem.OEErrorLevel_Error")}

    onFlags = [key for key in levels if itf.GetBool(key)]
    if not onFlags:
        oechem.OEThrow.Fatal("Need specify exactly one error level: " +
                             "|".join(levels.keys()))
    elif len(onFlags) > 1:
        oechem.OEThrow.Fatal("This flags are mutually exclusive: " +
                             "|".join(onFlags))

    level, name = levels[onFlags[0]]

    s = ServerProxy("http://" + itf.GetString("-host"))
    if s.OEThrowSetLevel(level):
        print("oechem.OEThrow.SetLevel(" + name + ") successful")
    else:
        print("oechem.OEThrow.SetLevel(" + name + ") failed")

    return 0


if __name__ == '__main__':
    sys.exit(main(sys.argv))

Listing 21: Prepare OEB file for faster load performance.

#!/usr/bin/env python
# (C) 2022 Cadence Design Systems, Inc. (Cadence) 
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.

# Cache as much as possible on the molecule to improve the performance
# of starting a server from scratch. Also cull to desired number of
# conformers if requested.

import os
import sys

from openeye import oechem
from openeye import oefastrocs

oepy = os.path.join(os.path.dirname(__file__), "..", "python")
sys.path.insert(0, os.path.realpath(oepy))

InterfaceData = """\
!BRIEF [-maxConfs 10] [-storeFloat] [-in] <database.oeb> [-out] <database.oeb>
!PARAMETER -in
  !TYPE string
  !REQUIRED true
  !BRIEF Input database to prep
  !KEYLESS 1
!END
!PARAMETER -out
  !TYPE string
  !REQUIRED true
  !BRIEF Output prepared database
  !KEYLESS 2
!END
!PARAMETER -maxConfs
  !ALIAS -mc
  !TYPE int
  !DEFAULT 10
  !REQUIRED false
  !BRIEF Maximum conformers per molecule
!END
!PARAMETER -storeFloat
  !ALIAS -sf
  !TYPE bool
  !DEFAULT false
  !REQUIRED false
  !BRIEF Store as full float precision in output file else store as half float (default)
!END
"""


def TrimConformers(mol, maxConfs):
    for i, conf in enumerate(mol.GetConfs()):
        if i >= maxConfs:
            mol.DeleteConf(conf)


def main(argv=[__name__]):
    itf = oechem.OEInterface(InterfaceData, argv)

    # input - preserve rotor-offset-compression
    ifs = oechem.oemolistream()
    oechem.OEPreserveRotCompress(ifs)
    if not ifs.open(itf.GetString("-in")):
        oechem.OEThrow.Fatal("Unable to open %s for reading" % itf.GetString("-in"))

    # output - use PRE-compress for smaller files (no need to .gz the file)
    ofs = oechem.oemolostream()
    oechem.OEPRECompress(ofs)
    if not ofs.open(itf.GetString("-out")):
        oechem.OEThrow.Fatal("Unable to open '%s' for writing" % itf.GetString("-out"))
    if itf.GetString("-out").endswith('.gz'):
        oechem.OEThrow.Fatal("Output file must not gzipped")

    maxConfs = itf.GetInt("-maxConfs")
    if maxConfs < 1:
        oechem.OEThrow.Fatal("Illegal number of conformer requested %u", maxConfs)

    dots = oechem.OEDots(10000, 200, "molecules")
    for mol in ifs.GetOEMols():
        if maxConfs is not None:
            TrimConformers(mol, maxConfs)

        oefastrocs.OEPrepareFastROCSMol(mol)
        if not itf.GetBool("-storeFloat"):
            halfMol = oechem.OEMol(mol, oechem.OEMCMolType_HalfFloatCartesian)
            oechem.OEWriteMolecule(ofs, halfMol)
        else:
            oechem.OEWriteMolecule(ofs, mol)

        dots.Update()

    dots.Total()
    ofs.close()

    print("Indexing %s" % itf.GetString("-out"))
    if not oechem.OECreateMolDatabaseIdx(itf.GetString("-out")):
        oechem.OEThrow.Fatal("Failed to index %s" % itf.GetString("-out"))

    return 0


if __name__ == '__main__':
    sys.exit(main(sys.argv))

Listing 22: Tie multiple servers to appear as a single server.

#!/usr/bin/env python
# (C) 2022 Cadence Design Systems, Inc. (Cadence) 
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.

import sys
import os

try:
    from xmlrpclib import ServerProxy, Binary
    from SimpleXMLRPCServer import SimpleXMLRPCServer, SimpleXMLRPCRequestHandler
except ImportError:  # python 3
    from xmlrpc.client import ServerProxy, Binary
    from xmlrpc.server import SimpleXMLRPCServer, SimpleXMLRPCRequestHandler

from threading import Thread
from threading import Lock
from ShapeDatabaseServer import SetupStream

from openeye import oechem

oepy = os.path.join(os.path.dirname(__file__), "..", "python")
sys.path.insert(0, os.path.realpath(oepy))


class ShapeServer:
    """ Encapsulates a single ShapeDatabase running on a remote
    server."""

    def __init__(self, servername, querydata, nhits, iformat, oformat, kwargs):
        """ Create a ShapeServer specified by servername and submit
        the querydata query for nhits. """
        self.server = ServerProxy("http://" + servername)
        self.queryidx = self.server.SubmitQuery(querydata, nhits, iformat, oformat, kwargs)

    def QueryStatus(self, blocking):
        """ Return the status of this server. """
        current, total = self.server.QueryStatus(self.queryidx, blocking)

        # only return once the tracer on the server has been initialized
        while total == 0:
            blocking = True
            current, total = self.server.QueryStatus(self.queryidx, blocking)

        return current, total

    def QueryHistogram(self):
        """ Return the histogram from this server. """
        return self.server.QueryHistogram(self.queryidx)

    def QueryResults(self):
        """ Return the results of this server. """
        return self.server.QueryResults(self.queryidx)


class ShapeServerPool:
    """ Abstract a collection of ShapeServer to appear as a single
    server."""

    def __init__(self, servernames, querymolstr, nhits, iformat, oformat, kwargs):
        """ Create a collection of ShapeServers as specified by
        servernames. Launching querymolstr on each for nhits."""
        self.nhits = nhits
        self.oformat = oformat

        thrdpool = LaunchFunctionThreadPool(ShapeServer)

        for sname in servernames:
            thrdpool.AddThread(sname, querymolstr, nhits, iformat, oformat, kwargs)

        self.shapeservers = []
        for server in thrdpool.GetResults():
            self.shapeservers.append(server)

    def QueryStatus(self, blocking):
        """ Return the status of these servers. """
        thrdpool = LaunchFunctionThreadPool(ShapeServer.QueryStatus)

        for server in self.shapeservers:
            thrdpool.AddThread(server, blocking)

        current = 0
        total = 0
        for scur, stot in thrdpool.GetResults():
            sys.stderr.write("%i/%i" % (scur, stot))
            current += scur
            total += stot

        return current, total

    def QueryHistogram(self):
        """ Return the total histogram across all servers. """
        thrdpool = LaunchFunctionThreadPool(ShapeServer.QueryHistogram)

        for server in self.shapeservers:
            thrdpool.AddThread(server)

        totalHist = None
        for hist in thrdpool.GetResults():
            if totalHist is None:
                totalHist = [0] * len(hist)

            totalHist = [lhs + rhs for lhs, rhs in zip(totalHist, hist)]

        return totalHist

    def QueryResults(self):
        """ Return the best nhits results of these servers. """
        timer = oechem.OEWallTimer()
        thrdpool = LaunchFunctionThreadPool(ShapeServer.QueryResults)

        for server in self.shapeservers:
            thrdpool.AddThread(server)

        data = []
        for oebdata in thrdpool.GetResults():
            data.append(oebdata.data)

        sys.stderr.write("%f seconds to get results back" % timer.Elapsed())

        data = b"".join(data)
        if not data:
            sys.stderr.write("Possible query error, no data returned \
                             by any of the downstream servers")
            return ""

        timer.Start()
        # read in from OEB strings
        ifs = oechem.oemolistream()
        ifs = SetupStream(ifs, self.oformat)
        if not ifs.openstring(data):
            sys.stderr.write("Unable to open OEB string from downstream server")
            return ""

        mols = [oechem.OEGraphMol(mol) for mol in ifs.GetOEGraphMols()]

        def GetScoreToCmp(mol):
            if oechem.OEHasSDData(mol, "ShapeTanimoto"):
                # sort by shape tanimoto
                if oechem.OEHasSDData(mol, "TanimotoCombo"):
                    return float(oechem.OEGetSDData(mol, "TanimotoCombo"))
                return float(oechem.OEGetSDData(mol, "ShapeTanimoto"))
            else:
                # sort by shape tversky
                if oechem.OEHasSDData(mol, "TverskyCombo"):
                    return float(oechem.OEGetSDData(mol, "TverskyCombo"))
                return float(oechem.OEGetSDData(mol, "ShapeTversky"))

        mols.sort(key=GetScoreToCmp)
        mols.reverse()

        # write back out to an OEB string
        ofs = oechem.oemolostream()
        ofs = SetupStream(ofs, self.oformat)
        ofs.openstring()

        nhits = self.nhits
        if not nhits:
            nhits = len(mols)

        for mol in mols[:nhits]:
            oechem.OEWriteMolecule(ofs, mol)

        sys.stderr.write("%f seconds to collate hitlist" % timer.Elapsed())

        return Binary(ofs.GetString())


class LaunchFunctionThread(Thread):
    """ A thread to launch a function and be able to retrieve its
    return value."""

    def __init__(self, func, *args):
        Thread.__init__(self)
        self.func = func
        self.args = args

    def run(self):
        try:
            self.result = self.func(*self.args)
        except Exception as e:
            self.exception = e

    def GetResult(self):
        if hasattr(self, "exception"):
            raise self.exception
        return self.result


class LaunchFunctionThreadPool:
    """ Given a function, launch it in several threads with a separate
    argument list for each."""

    def __init__(self, func):
        """ Start a new thread pool to execute the function func. """
        self.func = func
        self.threads = []

    def AddThread(self, *args):
        """ Create and start another thread to run func on args. """
        thrd = LaunchFunctionThread(self.func, *args)
        thrd.start()
        self.threads.append(thrd)

    def GetResults(self):
        """ Returns an iterable of the results of each thread in the
        order they were added with AddThread."""
        for thrd in self.threads:
            thrd.join()
            yield thrd.GetResult()


def ShapeServerIsLoaded(servername, blocking):
    """ Helper function to determine whether a server is in the 'loaded' state. """
    server = ServerProxy("http://" + servername)
    return server.IsLoaded(blocking)


class ShapeServerProxy:
    """ Proxy queries across multiple remote shape servers."""

    def __init__(self, servernames):
        """ Create a proxy  """
        self.servernames = servernames
        self.queryidx = 0
        self.activequeries = {}
        self.lock = Lock()

    def IsLoaded(self, blocking=False):
        """ Return whether the servers have finished loading. """
        thrdpool = LaunchFunctionThreadPool(ShapeServerIsLoaded)

        for server in self.servernames:
            thrdpool.AddThread(server, blocking)

        areloaded = True
        for result in thrdpool.GetResults():
            areloaded = areloaded and result

        return areloaded

    def SubmitQuery(self, querymolstr, nhits, iformat=".oeb", oformat=".oeb", kwargs=None):
        """ Submit a query to these shape servers. """
        if not kwargs:
            kwargs = {}
        shapeservers = ShapeServerPool(self.servernames, querymolstr,
                                       nhits, iformat, oformat, kwargs)

        self.lock.acquire()
        try:
            idx = self.queryidx
            self.queryidx += 1

            self.activequeries[idx] = shapeservers
        finally:
            self.lock.release()

        return idx

    def QueryStatus(self, queryidx, blocking=False):
        """ Return the status of the query specified by queryidx. """
        self.lock.acquire()
        try:
            shapeservers = self.activequeries[queryidx]
        finally:
            self.lock.release()

        return shapeservers.QueryStatus(blocking)

    def QueryHistogram(self, queryidx):
        """ Return the current histogram of scores specified by
        queryidx."""
        self.lock.acquire()
        try:
            shapeservers = self.activequeries[queryidx]
        finally:
            self.lock.release()

        return shapeservers.QueryHistogram()

    def QueryResults(self, queryidx):
        """ Return the results of the query specified by queryidx. """
        self.lock.acquire()
        try:
            shapeservers = self.activequeries.pop(queryidx)
        finally:
            self.lock.release()

        return shapeservers.QueryResults()


# Restrict to a particular path.
class RequestHandler(SimpleXMLRPCRequestHandler):
    rpc_paths = ('/RPC2',)


def main(argv=[__name__]):
    if len(argv) < 2:
        oechem.OEThrow.Usage("%s <server 1> <server 2> ... <server n> [portnumber=8080]" % argv[0])

    # default port number is 8080
    portnumber = 8080
    try:
        portnumber = int(argv[-1])
        servernames = argv[1:-1]
    except ValueError:
        servernames = argv[1:]

    # Create server, an empty string is used to allow connections with
    # any hostname
    server = SimpleXMLRPCServer(("", portnumber),
                                requestHandler=RequestHandler)
    server.register_introspection_functions()

    server.register_instance(ShapeServerProxy(servernames))

    try:
        # Run the server's main loop
        server.serve_forever()
    finally:
        server.server_close()

    return 0


if __name__ == '__main__':
    sys.exit(main(sys.argv))

Listing 23: Calculate distance between all molecules in database with themselves.

#!/usr/bin/env python
# (C) 2022 Cadence Design Systems, Inc. (Cadence) 
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.

# Write out a csv file of the similarity matrix of a multi-conformer
# database. Note, all conformers will be compared to each other,
# however, only the best match will be reported between two molecules.

import sys
import os
import csv

from openeye import oechem
from openeye import oefastrocs

oepy = os.path.join(os.path.dirname(__file__), "openeye", "python")
sys.path.insert(0, os.path.realpath(oepy))

InterfaceData = """\
!BRIEF [-shapeOnly] [-dbase] <database> [-matrix] <clusters.csv>
!PARAMETER -dbase
  !TYPE string
  !REQUIRED true
  !BRIEF Input database to select from
  !KEYLESS 1
!END
!PARAMETER -matrix
  !TYPE string
  !REQUIRED true
  !BRIEF csv file to write similarity matrix to
  !KEYLESS 2
!END
!PARAMETER -shapeOnly
  !ALIAS -s
  !TYPE bool
  !DEFAULT false
  !BRIEF Run FastROCS in shape only mode.
!END
"""


def GetScoreGetter(shapeOnly=False):
    if shapeOnly:
        return oefastrocs.OEShapeDatabaseScore.GetShapeTanimoto
    return


def main(argv=[__name__]):
    itf = oechem.OEInterface(InterfaceData, argv)

    ifs = oechem.oemolistream()
    dbname = itf.GetString("-dbase")
    if oechem.OEIsGZip(dbname):
        oechem.OEThrow.Fatal("%s is an unsupported database file format as it is gzipped.\n"
                             "Preferred formats are .oeb, .sdf or .oez", dbname)

    if not ifs.open(dbname):
        oechem.OEThrow.Fatal("Unable to open %s for reading" % dbname)

    colname = "TanimotoCombo"
    getter = oefastrocs.OEShapeDatabaseScore.GetTanimotoCombo
    dbtype = oefastrocs.OEShapeDatabaseType_Default
    if itf.GetBool("-shapeOnly"):
        colname = "ShapeTanimoto"
        getter = oefastrocs.OEShapeDatabaseScore.GetShapeTanimoto
        dbtype = oefastrocs.OEShapeDatabaseType_Shape

    csvwriter = csv.writer(open(itf.GetString("-matrix"), 'w'))
    csvwriter.writerow(["Title1", "Title2", colname])

    shapedb = oefastrocs.OEShapeDatabase(dbtype)
    options = oefastrocs.OEShapeDatabaseOptions()
    options.SetScoreType(dbtype)

    lmat = [[]]
    titles = []
    for mol in ifs.GetOEMols():
        if titles:
            bestscores = [0.0] * len(titles)
            for conf in mol.GetConfs():
                for score in shapedb.GetScores(conf, options):
                    midx = score.GetMolIdx()
                    bestscores[midx] = max(bestscores[midx], getter(score))

            lmat.append(bestscores)

        shapedb.AddMol(mol)

        title = mol.GetTitle()
        if not title:
            title = str(len(titles) + 1)
        titles.append(title)

    # write csv file
    csvwriter = csv.writer(open(itf.GetString("-matrix"), 'w'))
    csvwriter.writerow(titles)
    nrows = len(titles)
    for i in range(nrows):
        row = [i+1]
        for j in range(nrows):
            val = 2.0
            if itf.GetBool("-shapeOnly"):
                val = 1.0

            if j > i:
                val -= lmat[j][i]
            elif j < i:
                val -= lmat[i][j]
            elif j == i:
                val = 0.0

            row.append("%.3f" % val)

        csvwriter.writerow(row)

    return 0


if __name__ == '__main__':
    sys.exit(main(sys.argv))

Listing 24: Database searching with queries using the user inertial starts orientation.

#!/usr/bin/env python
# (C) 2022 Cadence Design Systems, Inc. (Cadence) 
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.

import os
import sys

from openeye import oechem
from openeye import oefastrocs

oepy = os.path.join(os.path.dirname(__file__), "..", "python")
sys.path.insert(0, os.path.realpath(oepy))


def main(argv=[__name__]):
    if len(argv) < 4:
        oechem.OEThrow.Usage("%s <database> <queries> <hits.oeb>" % argv[0])
        return 0

    # check system
    if not oefastrocs.OEFastROCSIsGPUReady():
        oechem.OEThrow.Info("No supported GPU available!")
        return 0

    # read in database
    dbname = argv[1]
    if oechem.OEIsGZip(dbname):
        oechem.OEThrow.Fatal("%s is an unsupported database file format as it is gzipped.\n"
                             "Preferred formats are .oeb, .sdf or .oez", dbname)

    print("Opening database file %s ..." % dbname)
    dbase = oefastrocs.OEShapeDatabase()
    moldb = oechem.OEMolDatabase()

    if not moldb.Open(dbname):
        oechem.OEThrow.Fatal("Unable to open '%s'" % dbname)

    dots = oechem.OEThreadedDots(10000, 200, "conformers")
    if not dbase.Open(moldb, dots):
        oechem.OEThrow.Fatal("Unable to initialize OEShapeDatabase on '%s'" % dbname)

    # customize search options
    opts = oefastrocs.OEShapeDatabaseOptions()
    opts.SetInitialOrientation(oefastrocs.OEFastROCSOrientation_UserInertialStarts)

    opts.SetLimit(5)

    qfname = argv[2]
    # read in query
    qfs = oechem.oemolistream()
    if not qfs.open(qfname):
        oechem.OEThrow.Fatal("Unable to open '%s'" % qfname)

    query = oechem.OEGraphMol()
    if not oechem.OEReadMolecule(qfs, query):
        oechem.OEThrow.Fatal("Unable to read query from '%s'" % qfname)

    # write out everthing to a similary named file
    ofs = oechem.oemolostream()
    if not ofs.open(argv[3]):
        oechem.OEThrow.Fatal("Unable to open '%s'" % argv[3])
    oechem.OEWriteMolecule(ofs, query)

    startsCoords = oechem.OEFloatVector()
    atomIdx = 1
    xyz = query.GetCoords()[atomIdx]
    for x in xyz:
        startsCoords.append(x)
    if not len(startsCoords) % 3 == 0:
        oechem.OEThrow.Fatal("Something went wrong whilst reading in user-starts coordinates")

    opts.SetUserStarts(oechem.OEFloatVector(startsCoords), int(len(startsCoords)/3))

    opts.SetMaxOverlays(opts.GetNumInertialStarts() * opts.GetNumUserStarts())

    if opts.GetInitialOrientation() == oefastrocs.OEFastROCSOrientation_UserInertialStarts:
        numStarts = opts.GetNumUserStarts()
        print("This example will use %u starts" % numStarts)

    print("Searching for %s" % qfname)
    for score in dbase.GetSortedScores(query, opts):
        print("Score for mol %u(conf %u) %f shape %f color" % (
               score.GetMolIdx(), score.GetConfIdx(),
               score.GetShapeTanimoto(), score.GetColorTanimoto()))
        dbmol = oechem.OEMol()
        molidx = score.GetMolIdx()
        if not moldb.GetMolecule(dbmol, molidx):
            print("Unable to retrieve molecule '%u' from the database" % molidx)
            continue

        mol = oechem.OEGraphMol(dbmol.GetConf(oechem.OEHasConfIdx(score.GetConfIdx())))
        oechem.OESetSDData(mol, "ShapeTanimoto", "%.4f" % score.GetShapeTanimoto())
        oechem.OESetSDData(mol, "ColorTanimoto", "%.4f" % score.GetColorTanimoto())
        oechem.OESetSDData(mol, "TanimotoCombo", "%.4f" % score.GetTanimotoCombo())
        score.Transform(mol)

        oechem.OEWriteMolecule(ofs, mol)
    print("Wrote results to %s" % argv[3])

    return 0


if __name__ == '__main__':
    sys.exit(main(sys.argv))