Shape Database Prep
Prepares an OEB file for faster load performance (~10x) into
ShapeDatabaseServer.py. The script will fail if attempting to store
as .oeb.gz, not .oeb. A corresponding .oeb.idx index
file will be created that will also improve load performance.
The disk space lost due to not using gzip compression is offset by
removing unnecessary information from the OEB file as well as the newer
PRE-compressed OEB format.
The maximum number of conformers per molecule can also be reduced at
the same time by specifying the -maxConfs argument. The default
maximum number of conformers is set to 10.
By default, the coordinates are stored in half floating point precision to save space.
To store in full floating point precision run with the --storeFloat flag.
Warning
The PRE-compress OEB format may not be readable by older OpenEye products built on versions of OEChem prior to OEChem 2.0.2 (2014.Oct).
Code
prompt> ShapeDatabasePrep.py [-maxConfs 10] [-storeFloat] [-in] <database.oeb> [-out] <database.oeb>
Download code
#!/usr/bin/env python
# (C) 2022 Cadence Design Systems, Inc. (Cadence)
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED. OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.
# Cache as much as possible on the molecule to improve the performance
# of starting a server from scratch. Also cull to desired number of
# conformers if requested.
import os
import sys
from openeye import oechem
from openeye import oefastrocs
oepy = os.path.join(os.path.dirname(__file__), "..", "python")
sys.path.insert(0, os.path.realpath(oepy))
InterfaceData = """\
!BRIEF [-maxConfs 10] [-storeFloat] [-in] <database.oeb> [-out] <database.oeb>
!PARAMETER -in
!TYPE string
!REQUIRED true
!BRIEF Input database to prep
!KEYLESS 1
!END
!PARAMETER -out
!TYPE string
!REQUIRED true
!BRIEF Output prepared database
!KEYLESS 2
!END
!PARAMETER -maxConfs
!ALIAS -mc
!TYPE int
!DEFAULT 10
!REQUIRED false
!BRIEF Maximum conformers per molecule
!END
!PARAMETER -storeFloat
!ALIAS -sf
!TYPE bool
!DEFAULT false
!REQUIRED false
!BRIEF Store as full float precision in output file else store as half float (default)
!END
"""
def TrimConformers(mol, maxConfs):
for i, conf in enumerate(mol.GetConfs()):
if i >= maxConfs:
mol.DeleteConf(conf)
def main(argv=[__name__]):
itf = oechem.OEInterface(InterfaceData, argv)
# input - preserve rotor-offset-compression
ifs = oechem.oemolistream()
oechem.OEPreserveRotCompress(ifs)
if not ifs.open(itf.GetString("-in")):
oechem.OEThrow.Fatal("Unable to open %s for reading" % itf.GetString("-in"))
# output - use PRE-compress for smaller files (no need to .gz the file)
ofs = oechem.oemolostream()
oechem.OEPRECompress(ofs)
if not ofs.open(itf.GetString("-out")):
oechem.OEThrow.Fatal("Unable to open '%s' for writing" % itf.GetString("-out"))
if itf.GetString("-out").endswith('.gz'):
oechem.OEThrow.Fatal("Output file must not gzipped")
maxConfs = itf.GetInt("-maxConfs")
if maxConfs < 1:
oechem.OEThrow.Fatal("Illegal number of conformer requested %u", maxConfs)
dots = oechem.OEDots(10000, 200, "molecules")
for mol in ifs.GetOEMols():
if maxConfs is not None:
TrimConformers(mol, maxConfs)
oefastrocs.OEPrepareFastROCSMol(mol)
if not itf.GetBool("-storeFloat"):
halfMol = oechem.OEMol(mol, oechem.OEMCMolType_HalfFloatCartesian)
oechem.OEWriteMolecule(ofs, halfMol)
else:
oechem.OEWriteMolecule(ofs, mol)
dots.Update()
dots.Total()
ofs.close()
print("Indexing %s" % itf.GetString("-out"))
if not oechem.OECreateMolDatabaseIdx(itf.GetString("-out")):
oechem.OEThrow.Fatal("Failed to index %s" % itf.GetString("-out"))
return 0
if __name__ == '__main__':
sys.exit(main(sys.argv))