FastROCS Examples

ShapeDatabaseServer

#!/usr/bin/env python
import sys, os
import socket
try:
    from SocketServer import ThreadingMixIn
except ImportError:
    from socketserver import ThreadingMixIn
from threading import Thread
from threading import Lock
from threading import Condition
from threading import Event

oepy = os.path.join(os.path.dirname(__file__), "..", "python")
sys.path.insert(0, os.path.realpath(oepy))

from openeye.oechem import *
# very important that OEChem is in this mode since we are passing molecules between threads
OESetMemPoolMode(OEMemPoolMode_System)

from openeye.oeshape import *
try:
    from openeye.oefastrocs import *
except ImportError:
    OEThrow.Fatal("This script is not available, FastROCS is not supported on this platform.")

try:
    from xmlrpclib import Binary
    from SimpleXMLRPCServer import SimpleXMLRPCServer, SimpleXMLRPCRequestHandler
except ImportError: # python 3
    from xmlrpc.client import Binary
    from xmlrpc.server import SimpleXMLRPCServer, SimpleXMLRPCRequestHandler

class ReadWriteLock(object):
    """ Basic locking primitive that allows multiple readers but only
    a single writer at a time. Useful for synchronizing database
    updates. Priority is given to pending writers. """
    def __init__(self):
        self.cond = Condition()
        self.readers = 0
        self.writers = 0

    def AcquireReadLock(self):
        self.cond.acquire()
        try:
            while self.writers:
                self.cond.wait()

            self.readers += 1
            assert self.writers == 0
        finally:
            self.cond.notifyAll()
            self.cond.release()

    def ReleaseReadLock(self):
        self.cond.acquire()
        assert self.readers > 0
        try:
            self.readers -= 1
        finally:
            self.cond.notifyAll()
            self.cond.release()

    def AcquireWriteLock(self):
        self.cond.acquire()
        self.writers += 1
        while self.readers:
            self.cond.wait()

        assert self.readers == 0
        assert self.writers > 0

    def ReleaseWriteLock(self):
        assert self.readers == 0
        assert self.writers > 0

        self.writers -= 1
        self.cond.notifyAll()
        self.cond.release()

class ShapeQueryThread(Thread):
    """ A thread to run a query against a shape database """

    def __init__(self, shapedb, querymolstr, nhits, iformat, oformat, shapeOnly, errorLevel):
        """ Create a new thread to perform a query. The query doesn't
        execute until start is called.
        shapedb - database to run the query against

        See MCMolShapeDatabase.GetBestOverlays for a description of
        the querymolstr and nhits arguments.
        """
        Thread.__init__(self)
        self.shapedb = shapedb
        self.querymolstr = querymolstr
        self.iformat = iformat
        self.oformat = oformat
        self.scoretype = GetDatabaseType(shapeOnly)
        numHistBins = 200
        if shapeOnly:
            numHistBins = 100
        self.tracer = OEDBTracer(numHistBins)
        self.options = OEShapeDatabaseOptions()
        self.options.SetTracer(self.tracer)
        self.options.SetLimit(nhits)
        self.options.SetScoreType(self.scoretype)
        self.lock = Lock()
        self.errorLevel = errorLevel

    def run(self):
        """ Perform the query """
        # make sure the error level is set for this operating system thread
        OEThrow.SetLevel(self.errorLevel)
        try:
            results = self.shapedb.GetBestOverlays(self.querymolstr,
                                                   self.options,
                                                   self.iformat,
                                                   self.oformat)

            # since we are writing to the thread's dictionary this could
            # race with the GetStatus method below
            self.lock.acquire()
            try:
                self.results = results
                if not results:
                    self.exception = RuntimeError("Query error, no results to return, check the server log for more information")
            finally:
                self.lock.release()

        except Exception as e:
            self.lock.acquire()
            try:
                self.exception = e
            finally:
                self.lock.release()


    def GetStatus(self, blocking):
        """ Returns a tuple of (count, total). count is the number of
        conformers already searched. total is the total number of
        conformers that will be searched.

        If blocking is True this method will not return until the
        count has been changed (beware of deadlocks!). If blocking is
        False the function will return immediately.
        """
        self.lock.acquire()
        try:
            if hasattr(self, "exception"):
                raise self.exception

            return self.tracer.GetCounts(blocking), self.tracer.GetTotal()
        finally:
            self.lock.release()

    def GetHistogram(self):
        """ Returns a list of integers representing the histogram of
        the molecule scores already scored. 
        """
        self.lock.acquire()
        try:
            if hasattr(self, "exception"):
                raise self.exception

            hist = self.tracer.GetHistogram()
            scoretype = self.scoretype
        finally:
            self.lock.release()

        frequencies = OEUIntVector()
        hist.GetHistogram(frequencies, scoretype)
        return list(frequencies)

    def GetResults(self):
        """ Return an OEB string containing the overlaid
        confomers. This method should only be called after this thread
        has been joined. """

        if hasattr(self, "exception"):
            raise self.exception

        return self.results

class ShapeQueryThreadPool:
    """
    Maintains a pool of threads querying the same MCMolShapeDatabase.
    """
    def __init__(self, dbase):
        """ Create a new thread pool to issues queries to dbase """
        self.shapedb = dbase
        self.queryidx = 0
        self.threads = {}
        self.lock = Lock()
        self.errorLevel = OEThrow.GetLevel()

    def SubmitQuery(self, querymolstr, nhits, iformat, oformat, shapeOnly):
        """ Returns an index that can be passed to the QueryStatus and
        QueryResults methods.

        See MCMolShapeDatabase.GetBestOverlays for a description of
        the querymolstr and nhits arguments.
        """
        self.lock.acquire()
        try:
            idx = self.queryidx
            self.queryidx += 1
            self.threads[idx] = ShapeQueryThread(self.shapedb,
                                                 querymolstr,
                                                 nhits,
                                                 iformat,
                                                 oformat,
                                                 shapeOnly,
                                                 self.errorLevel)
            self.threads[idx].start()
        finally:
            self.lock.release()

        return idx

    def QueryStatus(self, idx, blocking):
        """ Returns the status of the query indicated by idx. See
        ShapeQueryThread.GetStatus for the description of the blocking
        argument. """
        self.lock.acquire()
        try:
            thrd = self.threads[idx]
        finally:
            self.lock.release()

        return thrd.GetStatus(blocking)

    def QueryHistogram(self, idx):
        """ Returns the histogram of molecule scores already scored
        for the query indicated by idx. """
        self.lock.acquire()
        try:
            thrd = self.threads[idx]
        finally:
            self.lock.release()

        return thrd.GetHistogram()

    def QueryResults(self, idx):
        """ Wait for the query associated with idx to complete and
        then return the results as an OEB string. """
        self.lock.acquire()
        try:
            thrd = self.threads[idx]
            del self.threads[idx]
        finally:
            self.lock.release()

        thrd.join()
        return thrd.GetResults()

    def SetLevel(self, level):
        """ Set what level of information should be printed by the server. """
        self.errorLevel = level
        return True


class DatabaseLoaderThread(Thread):
    """ A thread to read a database into memory. Special note, OEChem
    must be placed in system allocation mode using
    OESetMemPoolMode(OEMemPoolMode_System). This is because the
    default OEChem memory caching scheme uses thread local storage,
    but since this thread is temporary only for reading in molecules
    that memory will be deallocated when this thread is terminated."""
    def __init__(self, shapedb, moldb, dbname, loadedEvent):
        """
        shapedb - the shapedb to add the molecules to
        moldb   - the OEMolDatabase object to use
        dbname  - the file name to open the OEMolDatabase on
        loadedEvent - event to set once loading is finished
        """
        Thread.__init__(self)
        self.shapedb  = shapedb
        self.moldb    = moldb
        self.dbname   = dbname
        self.loadedEvent = loadedEvent

    def run(self):
        """ Open the database file and load it into the OEShapeDatabase """
        timer = OEWallTimer()
        sys.stderr.write("Opening database file %s ...\n" % self.dbname)
        if not self.moldb.Open(self.dbname):
            OEThrow.Fatal("Unable to open '%s'" % self.dbname)

        dots  = OEThreadedDots(10000, 200, "conformers")
        if not self.shapedb.Open(self.moldb, dots):
            OEThrow.Fatal("Unable to initialize OEShapeDatabase on '%s'" % self.dbname)

        dots.Total()
        sys.stderr.write("%s seconds to load database\n" % timer.Elapsed())
        self.loadedEvent.set()

def SetupStream(strm, format):
    format = format.strip('.')
    ftype = OEGetFileType(format)
    if ftype == OEFormat_UNDEFINED:
        raise ValueError("Unsupported file format sent to server '%s'" % format)
    strm.SetFormat(ftype)
    strm.Setgz(OEIsGZip(format))
    return strm

OECOLOR_FORCEFIELDS = {
    "ImplicitMillsDean" : OEColorFFType_ImplicitMillsDean,
    "ImplicitMillsDeanNoRings" : OEColorFFType_ImplicitMillsDeanNoRings,
    "ExplicitMillsDean" : OEColorFFType_ExplicitMillsDean,
    "ExplicitMillsDeanNoRings" : OEColorFFType_ExplicitMillsDeanNoRings
    }

def GetDatabaseType(shapeOnly):
    if shapeOnly:
        return OEShapeDatabaseType_Shape
    return OEShapeDatabaseType_Default

def GetShapeDatabaseArgs(itf):
    shapeOnly = itf.GetBool("-shapeOnly")
    if shapeOnly and itf.GetParameter("-chemff").GetHasValue():
        OEThrow.Fatal("Unable to specify -shapeOnly and -chemff at the same time!")

    chemff = itf.GetString("-chemff")
    if not chemff.endswith(".cff"):
        return (GetDatabaseType(shapeOnly), OECOLOR_FORCEFIELDS[chemff])

    # given a .cff file, use that to construct a OEColorForceField
    assert not shapeOnly
    cff = OEColorForceField()
    if not cff.Init(chemff):
        OEThrow.Fatal("Unable to read color force field from '%s'" % chemff)
        
    return (cff,)
    

def ReadShapeQuery(querymolstr):
    iss = oeisstream(querymolstr)
    query = OEShapeQueryPublic()

    if not OEReadShapeQuery(iss, query):
        raise ValueError("Unable to read a shape query from the data string")

    return query

class MCMolShapeDatabase:
    """ Maintains a database of MCMols that can be queried by shape
    similarity."""
    def __init__(self, itf):
        """ Create a MCMolShapeDatabase from the parameters specified by the OEInterface. """
        self.rwlock  = ReadWriteLock()
        self.loadedEvent = Event()

        self.dbname  = itf.GetString("-dbase")
        self.moldb   = OEMolDatabase()

        self.dbtype = GetDatabaseType(itf.GetBool("-shapeOnly"))
        self.shapedb = OEShapeDatabase(*GetShapeDatabaseArgs(itf))

        # this thread is daemonic so a KeyboardInterupt
        # during the load will cancel the process
        self.loaderThread = DatabaseLoaderThread(self.shapedb,
                                                 self.moldb,
                                                 self.dbname,
                                                 self.loadedEvent)
        self.loaderThread.setDaemon(True)
        self.loaderThread.start()

    def IsLoaded(self, blocking=False):
        """ Return whether the server has finished loading. """
        if blocking:
            self.loadedEvent.wait()

        # clean up the load waiter thread if it's still there
        if self.loadedEvent.isSet() and self.loaderThread is not None:
            self.rwlock.AcquireWriteLock()
            try: # typical double checked locking
                if self.loaderThread is not None:
                    self.loaderThread.join()
                    self.loaderThread = None
            finally:
                self.rwlock.ReleaseWriteLock()

        return self.loadedEvent.isSet()

    def GetBestOverlays(self, querymolstr, options, iformat, oformat):
        """ Return a string of the format specified by 'oformat'
        containing nhits overlaid confomers using querymolstr as the
        query interpretted as iformat.

        querymolstr - a string containing a molecule to use as the query
        options - an instance of OEShapeDatabaseOptions
        iformat - a string representing the file extension to parse the querymolstr as.
                  Note: old clients could be passing .sq files, so
                  iformat == '.oeb' will try to interpret the file as
                  a .sq file.
        oformat - file format to write the results as
        """
        timer = OEWallTimer()

        # make sure to wait for the load to finish
        blocking = True
        loaded = self.IsLoaded(blocking)
        assert loaded

        if iformat.startswith(".sq"):
            query = ReadShapeQuery(querymolstr)
        else:
            # read in query
            qfs = oemolistream()
            qfs = SetupStream(qfs, iformat)
            if not qfs.openstring(querymolstr):
                raise ValueError("Unable to open input molecule string")

            query = OEGraphMol()
            if not OEReadMolecule(qfs, query):
                if iformat == ".oeb": # could be an old client trying to send a .sq file.
                    query = ReadShapeQuery(querymolstr)
                else:
                    raise ValueError("Unable to read a molecule from the string of format '%s'" % iformat)

        ofs = oemolostream()
        ofs = SetupStream(ofs, oformat)
        if not ofs.openstring():
            raise ValueError("Unable to openstring for output")

        # do we only want shape based results?

        # this is a "Write" lock to be paranoid and not overload the GPU
        self.rwlock.AcquireWriteLock()
        try:
            # do search
            scores = self.shapedb.GetSortedScores(query, options)
            sys.stderr.write("%f seconds to do search\n" % timer.Elapsed())
        finally:
            self.rwlock.ReleaseWriteLock()

        timer.Start()
        # write results
        for score in scores:
            mcmol = OEMol()
            if not self.moldb.GetMolecule(mcmol, score.GetMolIdx()):
                OEThrow.Warning("Can't retrieve molecule %i from the OEMolDatabase, skipping..." % score.GetMolIdx())
                continue
            # remove hydrogens to make output smaller, this also
            # ensures OEPrepareFastROCSMol will have the same output
            OESuppressHydrogens(mcmol)

            mol = OEGraphMol(mcmol.GetConf(OEHasConfIdx(score.GetConfIdx())))
            OECopySDData(mol, mcmol)

            OESetSDData(mol, "ShapeTanimoto", "%.4f" % score.GetShapeTanimoto())
            OESetSDData(mol, "ColorTanimoto", "%.4f" % score.GetColorTanimoto())
            OESetSDData(mol, "TanimotoCombo", "%.4f" % score.GetTanimotoCombo())

            score.Transform(mol)

            OEWriteMolecule(ofs, mol)

        output = ofs.GetString()
        sys.stderr.write("%f seconds to write hitlist\n" % timer.Elapsed())
        sys.stderr.flush()
        ofs.close()

        return output

    def GetName(self):
        self.rwlock.AcquireReadLock()
        try:
            return self.dbname
        finally:
            self.rwlock.ReleaseReadLock()

    def SetName(self, name):
        self.rwlock.AcquireWriteLock()
        try:
            self.dbname = name
        finally:
            self.rwlock.ReleaseWriteLock()

class ShapeQueryServer:
    """ This object's methods are exposed via XMLRPC. """
    def __init__(self, itf):
        """ Initialize the server to serve queries on the database
        named by dbname."""
        self.shapedb = MCMolShapeDatabase(itf)
        self.thdpool = ShapeQueryThreadPool(self.shapedb)
        self.itf     = itf

    def IsLoaded(self, blocking=False):
        """ Return whether the server has finished loading. """
        return self.shapedb.IsLoaded(blocking)

    def GetBestOverlays(self, querymolstr, nhits, iformat=".oeb", oformat=".oeb"):
        """ A blocking call that only returns once the query is completed. """
        results = self.shapedb.GetBestOverlays(querymolstr.data, nhits, iformat, oformat)
        return Binary(results)

    def SubmitQuery(self, querymolstr, nhits, iformat=".oeb", oformat=".oeb", shapeOnly=False):
        """ Returns a index that can be used by QueryStatus and
        QueryResults. This method will return immediately."""
        if self.itf.GetBool("-shapeOnly"):
            shapeOnly = True

        return self.thdpool.SubmitQuery(querymolstr.data, nhits, iformat, oformat, shapeOnly)

    def QueryStatus(self, queryidx, blocking=False):
        """ Return the status of the query specified by queryidx. See
        ShapeQueryThread.GetStatus for a description of the blocking
        argument and the return value."""
        return self.thdpool.QueryStatus(queryidx, blocking)

    def QueryHistogram(self, queryidx):
        """ Return the current histogram of scores specified by
        queryidx."""
        return self.thdpool.QueryHistogram(queryidx)

    def QueryResults(self, queryidx):
        """ Wait for the query associated with idx to complete and
        then return the results as an OEB string. """
        results = self.thdpool.QueryResults(queryidx)
        return Binary(results)

    def GetVersion(self):
        """ Returns what version of FastROCS this server is. """
        return OEFastROCSGetRelease()

    def OEThrowSetLevel(self, level):
        """ Set what level of information should be printed by the server. """
        return self.thdpool.SetLevel(level)

    def GetName(self):
        """ The name of this database. By default this is the file name of the database used. """
        return self.shapedb.GetName()

    def SetName(self, name):
        """ Set a custom database name for this server. """
        self.shapedb.SetName(name)
        return True

# Restrict to a particular path.
class RequestHandler(SimpleXMLRPCRequestHandler):
    rpc_paths = ('/RPC2',)

class AsyncXMLRPCServer(ThreadingMixIn, SimpleXMLRPCServer):
    # if a shutdown request occurs through a signal force everything to terminate immediately
    daemon_threads = True
    allow_reuse_address = True

InterfaceData = """\
!BRIEF [-shapeOnly | -chemff <color forcefield>] [-hostname] [-dbase] database [[-port] 8080]
!PARAMETER -dbase
  !TYPE string
  !REQUIRED true
  !BRIEF Input database to serve
  !KEYLESS 1
!END
!PARAMETER -port
  !TYPE int
  !REQUIRED false
  !BRIEF Port number to start the XML RPC server on
  !DEFAULT 8080
  !KEYLESS 2
!END
!PARAMETER -hostname
  !TYPE string
  !DEFAULT 0.0.0.0
  !BRIEF Name of the server to bind to
!END
!PARAMETER -shapeOnly
  !ALIAS -s
  !TYPE bool
  !DEFAULT false
  !BRIEF Run FastROCS server in shape only mode, clients can also control this separately
!END
!PARAMETER -chemff
  !TYPE string
  !LEGAL_VALUE ImplicitMillsDean
  !LEGAL_VALUE ImplicitMillsDeanNoRings
  !LEGAL_VALUE ExplicitMillsDean
  !LEGAL_VALUE ExplicitMillsDeanNoRings
  !LEGAL_VALUE *.cff
  !DEFAULT ImplicitMillsDean
  !BRIEF Chemical force field. Either a constant or a filename. 
!END
"""

def main(argv=[__name__]):

    if not OEFastROCSIsGPUReady():
        OEThrow.Fatal("No supported GPU available to run FastROCS TK!")

    itf = OEInterface(InterfaceData, argv)

    # default hostname to bind is 0.0.0.0, to allow connections with
    # any hostname
    hostname = itf.GetString("-hostname")

    # default port number is 8080
    portnumber = itf.GetInt("-port")

    # create server
    server = AsyncXMLRPCServer((hostname, portnumber),
                                requestHandler=RequestHandler)
    hostname, portnumber = server.socket.getsockname()
    if hostname == "0.0.0.0":
        hostname = socket.gethostname()
    sys.stderr.write("Listening for ShapeDatabaseClient.py requests on %s:%i\n\n" % (hostname, portnumber))
    sys.stderr.write("Example: ShapeDatabaseClient.py %s:%i query.sdf hit.sdf\n\n" % (hostname, portnumber))

    # register the XMLRPC methods
    server.register_introspection_functions()

    server.register_instance(ShapeQueryServer(itf))

    try:
        # Run the server's main loop
        server.serve_forever()
    finally:
        server.server_close()

    return 0


if __name__ == '__main__':
    sys.exit(main(sys.argv))

See also

ShapeDatabasePrep

#!/usr/bin/env python
#############################################################################
#  Copyright (C) 2015-2016 OpenEye Scientific Software, Inc.
#############################################################################
# Cache as much as possible on the molecule to improve the performance
# of starting a server from scratch. Also cull to desired number of
# conformers if requested.

import os, sys

oepy = os.path.join(os.path.dirname(__file__), "..", "python")
sys.path.insert(0, os.path.realpath(oepy))

from openeye.oechem import *
from openeye.oeshape import *
from openeye.oefastrocs import *

def TrimConformers(mol, maxConfs):
    for i, conf in enumerate(mol.GetConfs()):
        if i >= maxConfs:
            mol.DeleteConf(conf)

def main(argv=[__name__]):
    if len(argv) != 3 and len(argv) != 4:
        OEThrow.Usage("%s <database.oeb> <prepped_database.oeb> [max_confs]" % argv[0])

    maxConfs = None
    if len(argv) == 4:
        maxConfs = int(argv[3])
        if maxConfs < 1:
            OEThrow.Fatal("Illegal number of conformer requested %u", maxConfs)

    # input - preserve rotor-offset-compression
    ifs = oemolistream()
    OEPreserveRotCompress(ifs)

    if not ifs.open(argv[1]):
        OEThrow.Fatal("Unable to open %s for reading" % argv[1])

    # output - use PRE-compress for smaller files (no need to .gz the file)
    ofs = oemolostream()
    OEPRECompress(ofs)
    if not ofs.open(argv[2]):
        OEThrow.Fatal("Unable to open %s for writing" % argv[2])

    dots = OEDots(10000, 200, "molecules")
    for mol in ifs.GetOEMols():
        if maxConfs is not None:
            TrimConformers(mol, maxConfs)

        OEPrepareFastROCSMol(mol)
        OEWriteMolecule(ofs, mol)
        dots.Update()
        
    dots.Total()
    ofs.close()

    OEThrow.Info("Indexing %s" % argv[2])
    if not OECreateMolDatabaseIdx(argv[2]):
        OEThrow.Fatal("Failed to index %s" % argv[2])
    
    return 0

if __name__ == '__main__':
    sys.exit(main(sys.argv))

See also

ShapeDatabaseClient

#!/usr/bin/env python
from __future__ import print_function
import sys, os
try:
    from xmlrpclib import ServerProxy, Binary, Fault
except ImportError: # python 3
    from xmlrpc.client import ServerProxy, Binary, Fault

def GetFormatExtension(fname):
    base, ext = os.path.splitext(fname.lower())
    if ext == ".gz":
        base, ext = os.path.splitext(base)
        ext += ".gz"
    return ext

def main(argv=[__name__]):
    if not (4 <= len(argv) <= 5):
        sys.stderr.write("%s <server:port> <query> <results> [num_hits = 100]\n" % argv[0])
        return 1

    numHits = 100
    if len(argv) == 5:
        numHits = int(argv[4])

    qfname = argv[2]
    try:
        fh = open(qfname, 'rb')
    except IOError:
        sys.stderr.write("Unable to open '%s' for reading" % qfname)
        return 1

    iformat = GetFormatExtension(qfname)

    ofname = argv[3]
    oformat = GetFormatExtension(ofname)

    s = ServerProxy("http://" + argv[1])
    data = Binary(fh.read())

    try:
        idx = s.SubmitQuery(data, numHits, iformat, oformat)
    except Fault as e:
        if "TypeError" in e.faultString:
            # we're trying to run against an older server, may be able
            # to still work if the formats ameniable.            
            if ((iformat == ".oeb" or
                 iformat == ".sq")
                and oformat == ".oeb"):
                idx = s.SubmitQuery(data, numHits)
            else:
                sys.stderr.write("%s is too new of a version to work with the server %s\n" % (argv[0], argv[1]))
                sys.stderr.write("Please upgrade your server to FastROCS version 1.4.0 or later to be able to use this client\n")
                sys.stderr.write("This client will work with this version of the server if the input file is either '.oeb' or '.sq' and the output file is '.oeb'\n")
                return 1
        else:
            sys.stderr.write(str(e))
            return 1

    while True:
        blocking = True
        try:
            current, total = s.QueryStatus(idx, blocking)
        except Fault as e:
            print(str(e), file=sys.stderr)
            return 1
        
        if total == 0:
            continue

        print("%i/%i" % (current, total))
        
        if total <= current:
            break
    
    results = s.QueryResults(idx)

    # assuming the results come back as a string in the requested format
    with open(ofname, 'wb') as output:
        output.write(results.data)

    return 0


if __name__ == '__main__':
    sys.exit(main(sys.argv))
                            

See also

ShapeDatabaseClientHistogram

#!/usr/bin/env python
from __future__ import print_function
from __future__ import unicode_literals
import sys, os
try:
    from xmlrpclib import ServerProxy, Binary, Fault
except ImportError: # python 3
    from xmlrpc.client import ServerProxy, Binary, Fault

class Pyasciigraph:
    """ Copied from https://pypi.python.org/pypi/ascii_graph/0.2.1 """
    def __init__(self, line_length=79, 
            min_graph_length=50, 
            separator_length=2
            ):
        """Constructor of Pyasciigraph
        
        :param int line_length: the max number of char on a line
                if any line cannot be shorter, 
                it will go over this limit
        :param int min_graph_length: the min number of char used by the graph
        :param int separator_length: the length of field separator
        """
        self.line_length = line_length
        self.separator_length = separator_length
        self.min_graph_length = min_graph_length

    def _u(self, x):
        if sys.version < '3':
            import codecs
            return codecs.unicode_escape_decode(x)[0]
        else:
            return x

    def _get_maximum(self, data):
        all_max = {}
        all_max['value_max_length'] = 0
        all_max['info_max_length'] = 0
        all_max['max_value'] = 0

        for (info, value) in data:
            if value > all_max['max_value']:
                all_max['max_value'] = value

            if len(info) > all_max['info_max_length']:
                all_max['info_max_length'] = len(info)
            
            if len(str(value)) > all_max['value_max_length']:
                all_max['value_max_length'] = len(str(value))
        return all_max

    def _gen_graph_string(self, value, max_value, graph_length, start_value):
        number_of_square = 0
        if max_value:
            number_of_square = int(value * graph_length / max_value)
        number_of_space = int(start_value - number_of_square)
        return '#' * number_of_square + self._u(' ') * number_of_space

    def _gen_info_string(self, info, start_info, line_length):
        number_of_space = (line_length - start_info - len(info))
        return info + self._u(' ') * number_of_space

    def _gen_value_string(self, value, start_value, start_info):
        number_space = start_info -\
                start_value -\
                len(str(value)) -\
                self.separator_length

        return  ' ' * number_space +\
                str(value) +\
                ' ' * self.separator_length

    def _sanitize_string(self, string):
        #get the type of a unicode string
        unicode_type = type(self._u('t'))
        input_type = type(string)
        if input_type is str:
            if sys.version < '3':
                info = unicode(string)
            else: 
                info = string
        elif input_type is unicode_type:
            info = string
        elif input_type is int or input_type is float:
            if sys.version < '3':
                info = unicode(string)
            else:
                info = str(string)
        return info

    def _sanitize_data(self, data):
        ret = []
        for item in data:
            ret.append((self._sanitize_string(item[0]), item[1]))
        return ret

    def graph(self, label, data, sort=0, with_value=True):
        """function generating the graph
        
        :param string label: the label of the graph
        :param iterable data: the data (list of tuple (info, value))
                info must be "castable" to a unicode string
                value must be an int or a float
        :param int sort: flag sorted
                0: not sorted (same order as given) (default)
                1: increasing order
                2: decreasing order
        :param boolean with_value: flag printing value
                True: print the numeric value (default)
                False: don't print the numeric value
        :rtype: a list of strings (each lines)

        """
        result = []
        san_data = self._sanitize_data(data)
        san_label = self._sanitize_string(label)

        if sort == 1:
            san_data = sorted(san_data, key=lambda value: value[1], reverse=False)
        elif sort == 2:
            san_data = sorted(san_data, key=lambda value: value[1], reverse=True)

        all_max = self._get_maximum(san_data)
        
        real_line_length = max(self.line_length, len(label))
        
        min_line_length = self.min_graph_length +\
                2 * self.separator_length +\
                all_max['value_max_length'] +\
                all_max['info_max_length']

        if min_line_length < real_line_length:
            #calcul of where to start info
            start_info = self.line_length -\
                    all_max['info_max_length']
            #calcul of where to start value
            start_value = start_info -\
                    self.separator_length -\
                    all_max['value_max_length']
            #calcul of where to end graph
            graph_length = start_value -\
                    self.separator_length
        else:
            #calcul of where to start value
            start_value = self.min_graph_length +\
                    self.separator_length
            #calcul of where to start info
            start_info = start_value +\
                    all_max['value_max_length'] +\
                    self.separator_length
            #calcul of where to end graph
            graph_length = self.min_graph_length
            #calcul of the real line length
            real_line_length = min_line_length

        result.append(san_label)
        result.append(self._u('#')* real_line_length)
        

        for item in san_data:
            info = item[0]
            value = item[1]

            graph_string = self._gen_graph_string(
                    value, 
                    all_max['max_value'], 
                    graph_length,
                    start_value
                    )

            value_string = self._gen_value_string(
                    value,
                    start_value,
                    start_info
                    )

            info_string = self._gen_info_string(
                    info,
                    start_info,
                    real_line_length
                    )
            new_line = graph_string + value_string + info_string
            result.append(new_line)

        return result

def AddBin(bins, binSize, binIdx, curTotal):
    lowerBound = binSize * binIdx 
    label = "%.2f" % lowerBound
    bins.append((label, curTotal))
   

def PrintHistogram(hist):
    squashFactor = 10
    binSize = 2.0/(len(hist) / squashFactor)

    bins = []
    curTotal = 0
    binIdx = 0
    for i, val in enumerate(hist):
        if i != 0 and (i % squashFactor) == 0:
            AddBin(bins, binSize, binIdx, curTotal)
            curTotal = 0
            binIdx += 1

        curTotal += val
    AddBin(bins, binSize, binIdx, curTotal)

    graph = Pyasciigraph()
    for line in graph.graph("FastROCS Tanimoto Combo Score Distribution", bins):
        print(line)


def GetFormatExtension(fname):
    base, ext = os.path.splitext(fname.lower())
    if ext == ".gz":
        base, ext = os.path.splitext(base)
        ext += ".gz"
    return ext

def main(argv=[__name__]):
    if not (4 <= len(argv) <= 5):
        sys.stderr.write("%s <server:port> <query> <results> [num_hits = 100]\n" % argv[0])
        return 1

    numHits = 100
    if len(argv) == 5:
        numHits = int(argv[4])

    qfname = argv[2]
    try:
        fh = open(qfname, 'rb')
    except IOError:
        sys.stderr.write("Unable to open '%s' for reading" % qfname)
        return 1

    iformat = GetFormatExtension(qfname)

    ofname = argv[3]
    oformat = GetFormatExtension(ofname)

    s = ServerProxy("http://" + argv[1])
    data = Binary(fh.read())

    try:
        idx = s.SubmitQuery(data, numHits, iformat, oformat)
    except Fault as e:
        sys.stderr.write(str(e))
        return 1

    while True:
        blocking = True
        try:
            current, total = s.QueryStatus(idx, blocking)
            hist = s.QueryHistogram(idx)
        except Fault as e:
            print(str(e), file=sys.stderr)
            return 1
        
        if total == 0:
            continue

        PrintHistogram(hist)
        
        if total <= current:
            break
    
    results = s.QueryResults(idx)

    # assuming the results come back as a string in the requested format
    with open(ofname, 'wb') as output:
        output.write(results.data)

    return 0


if __name__ == '__main__':
    sys.exit(main(sys.argv))
                            

See also

ShapeDatabaseIsLoaded

#!/usr/bin/env python
from __future__ import print_function
import sys, os
try:
    from xmlrpclib import ServerProxy
except ImportError: # python 3
    from xmlrpc.client import ServerProxy

import socket
from time import sleep

oepy = os.path.join(os.path.dirname(__file__), "..", "python")
sys.path.insert(0, os.path.realpath(oepy))

from openeye.oechem import *

InterfaceData = """\
!BRIEF [-blocking] [-h] <server:port>
!PARAMETER -host
  !ALIAS -h
  !TYPE string
  !REQUIRED true
  !BRIEF The host to check to see if it is up yet
  !KEYLESS 1
!END
!PARAMETER -blocking
  !ALIAS -b
  !TYPE bool
  !DEFAULT false
  !BRIEF If true the program will not exit until the database has finished loading.
!END
!PARAMETER -retries
  !ALIAS -r
  !TYPE int
  !DEFAULT 10
  !BRIEF Number of times to try connecting to the server.
!END
!PARAMETER -timeout
  !ALIAS -t
  !TYPE float
  !DEFAULT 60.0
  !BRIEF The time between retries is the timeout divided by the number of retries.
!END
"""

def main(argv=[__name__]):
    itf = OEInterface(InterfaceData, argv)

    host = itf.GetString("-host")
    s = ServerProxy("http://" + host)

    blocking = itf.GetBool("-blocking")
    retries  = itf.GetInt("-retries")
    if retries < 1:
        OEThrow.Fatal("-retries must be greater than 0")
    timeout  = itf.GetFloat("-timeout")
    if timeout <= 0.0:
        OEThrow.Fatal("-timeout must be greater than 0.0")
    waittime = timeout/retries
    loaded = False
    while retries:
        try:
            loaded = s.IsLoaded(blocking)
            break
        except socket.error:
            retries -= 1
            if retries:
                print("Unable to connect to %s, retrying in %2.1f seconds" % (host, waittime))
                sleep(waittime)

    if not retries:
        print("Was never able to connect to a server, exiting...")
        return -1

    if loaded:
        loaded = "True"
    else:
        loaded = "False"

    print(host, "IsLoaded =", loaded)

    if loaded:
        return 0

    return 1

if __name__ == '__main__':
    sys.exit(main(sys.argv))

See also

ShapeDatabaseChunker

#!/usr/bin/env python
#############################################################################
#  Copyright (C) 2010-2015 OpenEye Scientific Software, Inc.
#############################################################################
# Split a multi-conformer database into N chunks keeping molecules
# with the same number of atoms in each chunk. Also caches other
# useful information onto the molecule to improve database load time.

import os, sys

oepy = os.path.join(os.path.dirname(__file__), "..", "python")
sys.path.insert(0, os.path.realpath(oepy))

from openeye.oechem import *
from openeye.oeshape import *
from openeye.oefastrocs import *

def main(argv=[__name__]):
    if len(argv) != 4:
        OEThrow.Usage("%s <database> <prefix> <n_servers>" % argv[0])

    # input - preserve rotor-offset-compression
    ifs = oemolistream()
    OEPreserveRotCompress(ifs)

    ifname = argv[1]
    if not ifs.open(ifname):
        OEThrow.Fatal("Unable to open %s for reading" % argv[1])

    # output
    prefix  = argv[2]
    ext     = OEGetFileExtension(prefix)
    extstrt = len(prefix)
    if ext:
        extstrt = -(len(ext) + 1)
    else:
        ext = OEGetFileExtension(ifname)
    base = prefix[:extstrt]
    fmt  = base + "_%i." + ext
    
    nservers = int(argv[3])
    outstrms = []
    for i in range(1, nservers + 1):
        ofs = oemolostream()
        if not ofs.open(fmt % i):
            OEThrow.Fatal("Unable to open %s for writing" % argv[2])

        outstrms.append(ofs)

    dots = OEDots(10000, 200, "molecules")
    for mol in ifs.GetOEMols():
        OEPrepareFastROCSMol(mol)

        nhvyatoms = OECount(mol, OEIsHeavy())

        ofs = outstrms[nhvyatoms % nservers]
        OEWriteMolecule(ofs, mol)

        dots.Update()
        
    dots.Total()

    for strm in outstrms:
        fname = strm.GetFileName()
        strm.close()
        OEThrow.Info("Indexing %s" % fname)
        if not OECreateMolDatabaseIdx(fname):
            OEThrow.Fatal("Failed to index %s" % fname)

    
    return 0

if __name__ == '__main__':
    sys.exit(main(sys.argv))

See also

ShapeDatabaseProxy

#!/usr/bin/env python
import sys, os

try:
    from xmlrpclib import ServerProxy, Binary
    from SimpleXMLRPCServer import SimpleXMLRPCServer, SimpleXMLRPCRequestHandler
except ImportError: # python 3
    from xmlrpc.client import ServerProxy, Binary
    from xmlrpc.server import SimpleXMLRPCServer, SimpleXMLRPCRequestHandler
    
from threading import Thread
from threading import Lock
from ShapeDatabaseServer import SetupStream

oepy = os.path.join(os.path.dirname(__file__), "..", "python")
sys.path.insert(0, os.path.realpath(oepy))

from openeye.oechem import *

class ShapeServer:
    """ Encapsulates a single ShapeDatabase running on a remote
    server."""

    def __init__(self, servername, querydata, nhits, iformat, oformat):
        """ Create a ShapeServer specified by servername and submit
        the querydata query for nhits. """
        self.server = ServerProxy("http://" + servername)
        self.queryidx = self.server.SubmitQuery(querydata, nhits, iformat, oformat)

    def QueryStatus(self, blocking):
        """ Return the status of this server. """
        current, total = self.server.QueryStatus(self.queryidx, blocking)

        # only return once the tracer on the server has been initialized
        while total == 0:
            blocking = True
            current, total = self.server.QueryStatus(self.queryidx, blocking)

        return current, total

    def QueryHistogram(self):
        """ Return the histogram from this server. """
        return self.server.QueryHistogram(self.queryidx)

    def QueryResults(self):
        """ Return the results of this server. """
        return self.server.QueryResults(self.queryidx)

class ShapeServerPool:
    """ Abstract a collection of ShapeServer to appear as a single
    server."""
    
    def __init__(self, servernames, querymolstr, nhits, iformat, oformat):
        """ Create a collection of ShapeServers as specified by
        servernames. Launching querymolstr on each for nhits."""
        self.nhits = nhits
        self.oformat = oformat
        
        thrdpool = LaunchFunctionThreadPool(ShapeServer)

        for sname in servernames:
            thrdpool.AddThread(sname, querymolstr, nhits, iformat, oformat)

        self.shapeservers = []
        for server in thrdpool.GetResults():
            self.shapeservers.append(server)

    def QueryStatus(self, blocking):
        """ Return the status of these servers. """
        thrdpool = LaunchFunctionThreadPool(ShapeServer.QueryStatus)

        for server in self.shapeservers:
            thrdpool.AddThread(server, blocking)

        current = 0
        total = 0
        for scur, stot in thrdpool.GetResults():
            sys.stderr.write("%i/%i" % (scur, stot))
            current += scur
            total   += stot

        return current, total

    def QueryHistogram(self):
        """ Return the total histogram across all servers. """
        thrdpool = LaunchFunctionThreadPool(ShapeServer.QueryHistogram)

        for server in self.shapeservers:
            thrdpool.AddThread(server)

        totalHist = None
        for hist in thrdpool.GetResults():
            if totalHist is None:
                totalHist = [0] * len(hist)

            totalHist = [lhs + rhs for lhs, rhs in zip(totalHist, hist)]

        return totalHist

    def QueryResults(self):
        """ Return the best nhits results of these servers. """
        timer = OEWallTimer()
        thrdpool = LaunchFunctionThreadPool(ShapeServer.QueryResults)

        for server in self.shapeservers:
            thrdpool.AddThread(server)

        data = []
        for oebdata in thrdpool.GetResults():
            data.append(oebdata.data)

        sys.stderr.write("%f seconds to get results back" % timer.Elapsed())

        data = b"".join(data)
        if not data:
            sys.stderr.write("Possible query error, no data returned by any of the downstream servers")
            return ""

        timer.Start()
        # read in from OEB strings
        ifs = oemolistream()
        ifs = SetupStream(ifs, self.oformat)
        if not ifs.openstring(data):
            sys.stderr.write("Unable to open OEB string from downstream server")
            return ""

        mols = [OEGraphMol(mol) for mol in ifs.GetOEGraphMols()]

        # sort by shape tanimoto
        def GetTanimotoToCmp(mol):
            if OEHasSDData(mol, "TanimotoCombo"):
                return float(OEGetSDData(mol, "TanimotoCombo"))
            return float(OEGetSDData(mol, "ShapeTanimoto"))

        mols.sort(key=GetTanimotoToCmp)
        mols.reverse()

        # write back out to an OEB string
        ofs = oemolostream()
        ofs = SetupStream(ofs, self.oformat)
        ofs.openstring()

        nhits = self.nhits
        if not nhits:
            nhits = len(mols)

        for mol in mols[:nhits]:
            OEWriteMolecule(ofs, mol)

        sys.stderr.write("%f seconds to collate hitlist" % timer.Elapsed())

        return Binary(ofs.GetString())
                

class LaunchFunctionThread(Thread):
    """ A thread to launch a function and be able to retrieve its
    return value."""
    
    def __init__(self, func, *args):
        Thread.__init__(self)
        self.func = func
        self.args = args

    def run(self):
        try:
            self.result = self.func(*self.args)
        except Exception as e:
            self.exception = e

    def GetResult(self):
        if hasattr(self, "exception"):
            raise self.exception
        return self.result

class LaunchFunctionThreadPool:
    """ Given a function, launch it in several threads with a separate
    argument list for each."""

    def __init__(self, func):
        """ Start a new thread pool to execute the function func. """
        self.func = func
        self.threads = []

    def AddThread(self, *args):
        """ Create and start another thread to run func on args. """
        thrd = LaunchFunctionThread(self.func, *args)
        thrd.start()
        self.threads.append(thrd)

    def GetResults(self):
        """ Returns an iterable of the results of each thread in the
        order they were added with AddThread."""
        for thrd in self.threads:
            thrd.join()
            yield thrd.GetResult()

def ShapeServerIsLoaded(servername, blocking):
    """ Helper function to determine whether a server is in the 'loaded' state. """
    server = ServerProxy("http://" + servername)
    return server.IsLoaded(blocking)

class ShapeServerProxy:
    """ Proxy queries across multiple remote shape servers."""
    
    def __init__(self, servernames):
        """ Create a proxy  """
        self.servernames = servernames
        self.queryidx = 0
        self.activequeries = {}
        self.lock = Lock()

    def IsLoaded(self, blocking=False):
        """ Return whether the servers have finished loading. """
        thrdpool = LaunchFunctionThreadPool(ShapeServerIsLoaded)

        for server in self.servernames:
            thrdpool.AddThread(server, blocking)

        areloaded = True
        for result in thrdpool.GetResults():
            areloaded = areloaded and result
        
        return areloaded

    def SubmitQuery(self, querymolstr, nhits, iformat=".oeb", oformat=".oeb"):
        """ Submit a query to these shape servers. """
        shapeservers = ShapeServerPool(self.servernames, querymolstr, nhits, iformat, oformat)

        self.lock.acquire()
        try:
            idx = self.queryidx
            self.queryidx += 1

            self.activequeries[idx] = shapeservers
        finally:
            self.lock.release()

        return idx
            
    def QueryStatus(self, queryidx, blocking=False):
        """ Return the status of the query specified by queryidx. """
        self.lock.acquire()
        try:
            shapeservers = self.activequeries[queryidx]
        finally:
            self.lock.release()

        return shapeservers.QueryStatus(blocking)

    def QueryHistogram(self, queryidx):
        """ Return the current histogram of scores specified by
        queryidx."""
        self.lock.acquire()
        try:
            shapeservers = self.activequeries[queryidx]
        finally:
            self.lock.release()

        return shapeservers.QueryHistogram()

    def QueryResults(self, queryidx):
        """ Return the results of the query specified by queryidx. """
        self.lock.acquire()
        try:
            shapeservers = self.activequeries.pop(queryidx)
        finally:
            self.lock.release()

        return shapeservers.QueryResults()

# Restrict to a particular path.
class RequestHandler(SimpleXMLRPCRequestHandler):
    rpc_paths = ('/RPC2',)

def main(argv=[__name__]):
    if len(argv) < 2:
        OEThrow.Usage("%s <server 1> <server 2> ... <server n> [portnumber=8080]" % argv[0])

    # default port number is 8080
    portnumber = 8080
    try:
        portnumber = int(argv[-1])
        servernames = argv[1:-1]
    except ValueError:
        servernames = argv[1:]

    # Create server, an empty string is used to allow connections with
    # any hostname
    server = SimpleXMLRPCServer(("", portnumber),
                                requestHandler=RequestHandler)
    server.register_introspection_functions()

    server.register_instance(ShapeServerProxy(servernames))

    try:
        # Run the server's main loop
        server.serve_forever()
    finally:
        server.server_close()

    return 0

if __name__ == '__main__':
    sys.exit(main(sys.argv))
                            

See also

ShapeDatabaseOEThrowSetLevel

#!/usr/bin/env python
from __future__ import print_function
import sys, os

try:
    from xmlrpclib import ServerProxy
except ImportError: # python 3
    from xmlrpc.client import ServerProxy

oepy = os.path.join(os.path.dirname(__file__), "..", "python")
sys.path.insert(0, os.path.realpath(oepy))

from openeye.oechem import *

InterfaceData = """\
!BRIEF (-debug|-verbose|-info|-warning|-error) [-h] <server:port>
!PARAMETER -host
  !ALIAS -h
  !TYPE string
  !REQUIRED true
  !BRIEF The host whose verbosity level will be changed
  !KEYLESS 1
!END
!PARAMETER -debug
  !ALIAS -d
  !TYPE bool
  !DEFAULT false
  !BRIEF Debug error level
!END
!PARAMETER -verbose
  !ALIAS -v
  !TYPE bool
  !DEFAULT false
  !BRIEF Verbose error level
!END
!PARAMETER -info
  !ALIAS -i
  !TYPE bool
  !DEFAULT false
  !BRIEF Info error level
!END
!PARAMETER -warning
  !ALIAS -w
  !TYPE bool
  !DEFAULT false
  !BRIEF Warning error level
!END
!PARAMETER -error
  !ALIAS -e
  !TYPE bool
  !DEFAULT false
  !BRIEF Unrecoverable error level
!END
"""

def main(argv=[__name__]):
    itf = OEInterface(InterfaceData, argv)
    
    levels = {"-debug"   : (OEErrorLevel_Debug,   "OEErrorLevel_Debug"),
              "-verbose" : (OEErrorLevel_Verbose, "OEErrorLevel_Verbose"),
              "-info"    : (OEErrorLevel_Info,    "OEErrorLevel_Info"),
              "-warning" : (OEErrorLevel_Warning, "OEErrorLevel_Warning"),
              "-error"   : (OEErrorLevel_Error,   "OEErrorLevel_Error")}

    onFlags = [key for key in levels if itf.GetBool(key)]
    if not onFlags:
        OEThrow.Fatal("Need specify exactly one error level: " +
                      "|".join(levels.keys()))
    elif len(onFlags) > 1:
        OEThrow.Fatal("This flags are mutually exclusive: " +
                      "|".join(onFlags))

    level, name = levels[onFlags[0]]

    s = ServerProxy("http://" + itf.GetString("-host"))
    if s.OEThrowSetLevel(level):
        print("OEThrow.SetLevel(" + name + ") successful")
    else:
        print("OEThrow.SetLevel(" + name + ") failed")

    return 0

if __name__ == '__main__':
    sys.exit(main(sys.argv))
                            

See also

CustomColorFFPrep

#!/usr/bin/env python
#############################################################################
#  Copyright (C) 2013-2015 OpenEye Scientific Software, Inc.
#############################################################################
# Cache custom color atoms onto a molecule to be used by FastROCS

import os, sys

oepy = os.path.join(os.path.dirname(__file__), "..", "python")
sys.path.insert(0, os.path.realpath(oepy))

from openeye.oechem import *
from openeye.oeshape import *
from openeye.oefastrocs import *

COLOR_FORCE_FIELD = """#
TYPE negative
#
#
PATTERN negative [-]
PATTERN negative [OD1+0]-[!#7D3]~[OD1+0]
PATTERN negative [OD1+0]-[!#7D4](~[OD1+0])~[OD1+0]
#
#
INTERACTION negative negative attractive gaussian weight=1.0 radius=1.0
"""

def main(argv=[__name__]):
    if len(argv) != 3:
        OEThrow.Usage("%s <input> <output>" % argv[0])

    # input - preserve rotor-offset-compression
    ifs = oemolistream()
    ihand = ifs.GetBinaryIOHandler()
    ihand.Clear()
    OEInitHandler(ihand, OEBRotCompressOpts(), OEBRotCompressOpts())

    ifname = argv[1]
    if not ifs.open(ifname):
        OEThrow.Fatal("Unable to open %s for reading" % argv[1])

    # output
    ofname = argv[2]
    oformt = OEGetFileType(OEGetFileExtension(ofname))
    if oformt != OEFormat_OEB:
        OEThrow.Fatal("Output file format much be OEB")

    ofs = oemolostream()
    if not ofs.open(ofname):
        OEThrow.Fatal("Unable to open %s for writing" % ofname)

    iss = oeisstream(COLOR_FORCE_FIELD)
    cff = OEColorForceField()
    if not cff.Init(iss):
        OEThrow.Fatal("Unable to initialize OEColorForceField")

    dots = OEDots(10000, 200, "molecules")
    for mol in ifs.GetOEMols():
        OEPrepareFastROCSMol(mol, cff)

        OEWriteMolecule(ofs, mol)

        dots.Update()
        
    dots.Total()
    ofs.close()

    OEThrow.Info("Indexing %s" % ofname)
    if not OECreateMolDatabaseIdx(ofname):
        OEThrow.Fatal("Failed to index %s" % argv[2])

    return 0

if __name__ == '__main__':
    sys.exit(main(sys.argv))

See also

BestShapeOverlay

#!/usr/bin/env python
from __future__ import print_function
import sys, os

oepy = os.path.join(os.path.dirname(__file__), "..", "python")
sys.path.insert(0, os.path.realpath(oepy))

from openeye.oechem import *
from openeye.oefastrocs import *

def main(argv=[__name__]):
    if len(argv) < 3:
        OEThrow.Usage("%s <database> [<queries> ... ]" % argv[0])

    if not OEFastROCSIsGPUReady():
        OEThrow.Info("No supported GPU available!")
        return 0

    dbname = argv[1]
    # read in database
    ifs = oemolistream()
    if not ifs.open(dbname):
        OEThrow.Fatal("Unable to open '%s'" % dbname)

    print("Opening database file %s ..." % dbname)
    timer = OEWallTimer()
    dbase = OEShapeDatabase()
    moldb = OEMolDatabase()
    if not moldb.Open(ifs):
        OEThrow.Fatal("Unable to open '%s'" % dbname)

    dots  = OEThreadedDots(10000, 200, "conformers")
    if not dbase.Open(moldb, dots):
        OEThrow.Fatal("Unable to initialize OEShapeDatabase on '%s'" % dbname)

    dots.Total()
    print("%s seconds to load database" % timer.Elapsed())

    for qfname in argv[2:]:
        # read in query
        qfs = oemolistream()
        if not qfs.open(qfname):
            OEThrow.Fatal("Unable to open '%s'" % qfname)

        query = OEGraphMol()
        if not OEReadMolecule(qfs, query):
            OEThrow.Fatal("Unable to read query from '%s'" % qfname)

        ext = OEGetFileExtension(qfname)
        base = qfname[:-(len(ext) + 1)]

        # write out everthing to a similary named file
        ofs = oemolostream()
        ofname = base + "_results." + ext
        if not ofs.open(ofname):
            OEThrow.Fatal("Unable to open '%s'" % argv[4])

        print("Searching for", qfname)
        numHits = moldb.NumMols()
        for score in dbase.GetSortedScores(query, numHits):
            dbmol = OEMol()
            molidx = score.GetMolIdx()
            if not moldb.GetMolecule(dbmol, molidx):
                print("Unable to retrieve molecule '%u' from the database" % molidx)
                continue
            
            mol = OEGraphMol(dbmol.GetConf(OEHasConfIdx(score.GetConfIdx())))

            OESetSDData(mol, "ShapeTanimoto", "%.4f" % score.GetShapeTanimoto())
            OESetSDData(mol, "ColorTanimoto", "%.4f" % score.GetColorTanimoto())
            OESetSDData(mol, "TanimotoCombo", "%.4f" % score.GetTanimotoCombo())
            score.Transform(mol)

            OEWriteMolecule(ofs, mol)
        print("Wrote results to", ofname)
        
    return 0

        
if __name__ == '__main__':
    sys.exit(main(sys.argv))
                            

See also

Shape Clustering

#!/usr/bin/env python
# Shape clustering
from __future__ import print_function
import sys, os

oepy = os.path.join(os.path.dirname(__file__), "..", "python")
sys.path.insert(0, os.path.realpath(oepy))

from openeye.oechem import *
from openeye.oeshape import * 
from openeye.oefastrocs import *

def GetScoreGetter(shapeOnly=False):
    if shapeOnly:
        return OEShapeDatabaseScore.GetShapeTanimoto
    return OEShapeDatabaseScore.GetTanimotoCombo

class ShapeCluster:
    def __init__(self, dbname, cutoff, shapeOnly):
        self.cutoff = cutoff

        # set up and options and database based upon shapeOnly
        self.defaultOptions = OEShapeDatabaseOptions()
        dbtype = OEShapeDatabaseType_Default
        if shapeOnly:
            dbtype = OEShapeDatabaseType_Shape

        self.defaultOptions.SetScoreType(dbtype)
        self.shapedb = OEShapeDatabase(dbtype)
        self.dbmols = []
        volumes = []

        # read in database
        ifs = oemolistream()
        if not ifs.open(dbname):
            OEThrow.Fatal("Unable to open '%s'" % dbname)

        count = 0
        for mol in ifs.GetOEGraphMols():
            title =  mol.GetTitle()
            if not title:
                title = "Untitled" + str(count)
                mol.SetTitle(title)
                count += 1

            idx = self.shapedb.AddMol(OEMol(mol))
            
            volume = OEGetCachedSelfShape(mol)
            if volume == 0.0:
                volume = OESelfShape(mol)
            volumes.append((volume, idx))
            
            dbmol = OEGraphMol(mol, OEMolBaseType_OEDBMol)
            dbmol.Compress()
            self.dbmols.append(dbmol)

        numMols = len(volumes)
        
        # find the molecule with the median volume as our first query
        volumes.sort()
        medianVolume, medianIdx = volumes[numMols // 2]

        self.nextClusterHeadIdx = medianIdx
        self.remainingMolecules = numMols

        self.tanimotos = [0.0] * numMols

        self.scoreGetter = GetScoreGetter(shapeOnly)

    def HasRemainingMolecules(self):
        return self.remainingMolecules != 0

    def _removeMolecule(self, idx):
        self.remainingMolecules -= 1
        
        assert self.dbmols[idx] is not None
        dbmol = self.dbmols[idx]
        dbmol.UnCompress()
        self.dbmols[idx] = None

        assert self.tanimotos[idx] is not None
        self.tanimotos[idx] = sys.float_info.max

        return dbmol

    def GetNextClusterHead(self):
        assert self.nextClusterHeadIdx is not None
        return self._removeMolecule(self.nextClusterHeadIdx)

    def GetCluster(self, query):
        options = OEShapeDatabaseOptions(self.defaultOptions)

        dots = OEDots(10000, 200, "molecules searched")

        minTani = sys.float_info.max
        minIdx = None
        for score in self.shapedb.GetScores(query, options):
            idx = score.GetMolIdx()
            # check if already in a cluster
            if self.dbmols[idx] is None:
                continue

            if self.cutoff < self.scoreGetter(score):
                yield self._removeMolecule(idx), score
            else:
                self.tanimotos[idx] = max(self.tanimotos[idx], self.scoreGetter(score))

                minTani, minIdx = min((minTani, minIdx), (self.tanimotos[idx], idx))
            dots.Update()
        dots.Total()

        self.nextClusterHeadIdx = minIdx
        

InterfaceData = """\
!BRIEF [-shapeOnly] [-cutoff 0.75] [-dbase] <database> [-clusters] <clusters.oeb>
!PARAMETER -dbase
  !TYPE string
  !REQUIRED true
  !BRIEF Input database to select from
  !KEYLESS 1
!END
!PARAMETER -clusters
  !TYPE string
  !REQUIRED true
  !BRIEF Output to write clusters to
  !KEYLESS 2
!END
!PARAMETER -shapeOnly
  !ALIAS -s
  !TYPE bool
  !DEFAULT false
  !BRIEF Run FastROCS in shape only mode. 
!END
!PARAMETER -cutoff
  !ALIAS -c
  !TYPE float
  !DEFAULT 0.75
  !BRIEF Number of random pairs to sample. 
!END
""" 
            
def main(argv=[__name__]):
    itf = OEInterface(InterfaceData, argv)

    cutoff = itf.GetFloat("-cutoff")

    ofs = oemolostream()
    if not ofs.open(itf.GetString("-clusters")):
        OEThrow.Fatal("Unable to open '%s'" % itf.GetString("-clusters"))

    if ofs.GetFormat() != OEFormat_OEB:
        OEThrow.Fatal("Output file must be OEB")

    sdtag  = "TanimotoComboFromHead"
    if itf.GetBool("-shapeOnly"):
        sdtag  = "ShapeTanimotoFromHead"
    getter = GetScoreGetter(itf.GetBool("-shapeOnly"))

    cluster = ShapeCluster(itf.GetString("-dbase"), cutoff, itf.GetBool("-shapeOnly"))

    # do the clustering
    while cluster.HasRemainingMolecules():
        clusterHead = cluster.GetNextClusterHead()
        print("Searching for neighbors of ", clusterHead.GetTitle())

        for nbrMol, score in cluster.GetCluster(clusterHead):
            OESetSDData(nbrMol, sdtag, "%.4f" % getter(score))

            score.Transform(nbrMol)

            clusterHead.AddData(nbrMol.GetTitle(), nbrMol)

        OEWriteMolecule(ofs, clusterHead)
    
    return 0
        
if __name__ == '__main__':
    sys.exit(main(sys.argv))

See also

Shape Distance Matrix

#!/usr/bin/env python
# Write out a csv file of the similarity matrix of a multi-conformer
# database. Note, all conformers will be compared to each other,
# however, only the best match will be reported between two molecules.

import sys, os
import csv

oepy = os.path.join(os.path.dirname(__file__), "openeye", "python")
sys.path.insert(0, os.path.realpath(oepy))

from openeye.oechem import *
from openeye.oefastrocs import *

InterfaceData = """\
!BRIEF [-shapeOnly] [-dbase] <database> [-matrix] <clusters.csv>
!PARAMETER -dbase
  !TYPE string
  !REQUIRED true
  !BRIEF Input database to select from
  !KEYLESS 1
!END
!PARAMETER -matrix
  !TYPE string
  !REQUIRED true
  !BRIEF csv file to write similarity matrix to
  !KEYLESS 2
!END
!PARAMETER -shapeOnly
  !ALIAS -s
  !TYPE bool
  !DEFAULT false
  !BRIEF Run FastROCS in shape only mode. 
!END
""" 
            
def GetScoreGetter(shapeOnly=False):
    if shapeOnly:
        return OEShapeDatabaseScore.GetShapeTanimoto
    return 

def main(argv=[__name__]):
    itf = OEInterface(InterfaceData, argv)

    ifs = oemolistream()
    if not ifs.open(itf.GetString("-dbase")):
        OEThrow.Fatal("Unable to open %s for reading" % itf.GetString("-dbase"))

    colname = "TanimotoCombo"
    getter  = OEShapeDatabaseScore.GetTanimotoCombo
    dbtype  = OEShapeDatabaseType_Default
    if itf.GetBool("-shapeOnly"):
        colname = "ShapeTanimoto"
        getter  = OEShapeDatabaseScore.GetShapeTanimoto
        dbtype  = OEShapeDatabaseType_Shape

    csvwriter = csv.writer(open(itf.GetString("-matrix"), 'w'))
    csvwriter.writerow(["Title1", "Title2", colname])

    shapedb = OEShapeDatabase(dbtype)
    options = OEShapeDatabaseOptions()
    options.SetScoreType(dbtype)
    
    lmat = [[]]
    titles  = []
    for mol in ifs.GetOEMols():
        if titles:
            bestscores = [0.0] * len(titles)
            for conf in mol.GetConfs():
                for score in shapedb.GetScores(conf, options):
                    midx = score.GetMolIdx()
                    bestscores[midx] = max(bestscores[midx], getter(score))

            lmat.append(bestscores)

        shapedb.AddMol(mol)

        title = mol.GetTitle() 
        if not title:
            title = str(len(titles) + 1)
        titles.append(title)

    #write csv file
    csvwriter = csv.writer(open(itf.GetString("-matrix"), 'w'))
    csvwriter.writerow(titles)
    nrows = len(titles)
    for i in range(nrows):
        row = [i+1]
        for j in range(nrows):
            val = 2.0
            if itf.GetBool("-shapeOnly"):
                val = 1.0
                
            if j > i:
                val -= lmat[j][i]
            elif j < i:
                val -= lmat[i][j]
            elif j == i:
                val = 0.0 

            row.append("%.3f" % val)

        csvwriter.writerow(row)

    return 0
        
if __name__ == '__main__':
    sys.exit(main(sys.argv))

See also