MCS Fragment database generation
A program that performs an MCS fragmentation analysis of a set of structures and saves the generated index file for subsequent loading and querying.
Schematic representation of the MCS fragment search process
See also
OEMCSFragDatabase class
OEMCSFragDatabaseOptions class
Command Line Interface
prompt> CreateMCSFragDatabase.py index.sdf output.mcsfrag
Code
Download code
#!/usr/bin/env python
# (C) 2022 Cadence Design Systems, Inc. (Cadence)
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED. OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.
#############################################################################
# Utility to perform an MCS fragmentation on an input set of structures
# and save the index for subsequent analysis
# ---------------------------------------------------------------------------
# CreateMCSFragDatabase index_mols output_index
# [ -verbose 1 ] // optional verbosity
#
# index_mols: filename of input molecules to analyze
# output_index: filename of MCS fragment index
#############################################################################
from openeye import oechem
from openeye import oemedchem
import sys
def MCSFragIndex(itf):
# output index file
mcsindexfile = itf.GetString("-output")
if not oemedchem.OEIsMCSFragDatabaseFileType(mcsindexfile):
oechem.OEThrow.Fatal("Output file is not an mcs fragment index type \
- needs .mcsfrag extension: {}" .format(mcsindexfile))
# create options class with defaults
mcsopt = oemedchem.OEMCSFragDatabaseOptions()
# set up options from command line
if not oemedchem.OESetupMCSFragDatabaseOptions(mcsopt, itf):
oechem.OEThrow.Fatal("Error setting MCS fragment database options")
# input structures to index
ifsindex = oechem.oemolistream()
if not ifsindex.open(itf.GetString("-input")):
oechem.OEThrow.Fatal("Unable to open {} for reading"
.format(itf.GetString("-input")))
# get requested verbosity setting
verbose = itf.GetBool("-verbose")
timer = itf.GetBool("-timer")
if verbose:
timer = True
watch = oechem.OEStopwatch()
maxrec = max(itf.GetInt("-maxrec"), 0)
statusrec = itf.GetInt("-status")
if verbose:
if not mcsopt.HasIndexableFragmentHeavyAtomRange():
oechem.OEThrow.Info("Indexing all fragments")
else:
oechem.OEThrow.Info("Using index range={0:.1f}-{1:.1f}%"
.format(mcsopt.GetIndexableFragmentRangeMin(),
mcsopt.GetIndexableFragmentRangeMax()))
if statusrec:
oechem.OEThrow.Info("Status output after every {0} records".format(statusrec))
if maxrec:
oechem.OEThrow.Info("Indexing a maximum of {0} records".format(maxrec))
# create indexing engine
mcsdb = oemedchem.OEMCSFragDatabase(mcsopt)
# add molecules to be indexed
record = 0
unindexed = 0
for mol in ifsindex.GetOEGraphMols():
status = mcsdb.AddMol(mol, record)
if status != record:
unindexed += 1
if verbose:
oechem.OEThrow.Info('Input structure not added to index, record=%d status=%s' %
(record, oemedchem.OEMatchedPairIndexStatusName(status)))
record += 1
if maxrec and record >= maxrec:
break # maximum record limit reached
if statusrec and (record % statusrec) == 0:
oechem.OEThrow.Info("Records: {} Indexed: {} Unindexed: {}"
.format(record, (record - unindexed), unindexed))
indextime = watch.Elapsed()
if record == 0:
oechem.OEThrow.Fatal("No records in input structure file for indexing")
if not mcsdb.NumFragments():
oechem.OEThrow.Fatal('No fragments found from indexing, ' +
'use -fragGe,-fragLe options to extend indexing range')
if timer:
if (not verbose and not timer) or not indextime:
oechem.OEThrow.Fatal("Processed {0} molecules, "
"generating {1} fragments"
.format(record, mcsdb.NumFragments()))
else:
oechem.OEThrow.Info("Processed {0} molecules, "
"generating {1} fragments in {2:.2F} sec: "
"{3:,.1F} mols/sec {4:,.1F} frags/sec"
.format(record,
mcsdb.NumFragments(),
indextime,
float(record)/float(indextime),
float(mcsdb.NumFragments())/float(indextime)))
if not oemedchem.OEWriteMCSFragDatabase(mcsindexfile, mcsdb):
oechem.OEThrow.Fatal("Error serializing MCS fragment database: {}"
.format(mcsindexfile))
# return some status information
oechem.OEThrow.Info("Records: {}, Indexed: {}, fragments: {:,d}"
.format(record,
mcsdb.NumMols(),
mcsdb.NumFragments()))
return 0
############################################################
InterfaceData = """
#createmcsfragdatabase interface file
!CATEGORY CreateMCSFragDatabase
!CATEGORY I/O
!PARAMETER -input 1
!TYPE string
!REQUIRED true
!BRIEF Input filename of structure(s) to index
!KEYLESS 1
!END
!PARAMETER -output 2
!TYPE string
!REQUIRED true
!BRIEF Output filename of MCS fragment serialized index
!KEYLESS 2
!END
!END
!CATEGORY options
!PARAMETER -verbose 1
!TYPE bool
!DEFAULT 0
!BRIEF generate verbose output
!END
!PARAMETER -maxrec 2
!TYPE int
!DEFAULT 0
!BRIEF limit indexing to -maxrec records from the -input structures
!END
!PARAMETER -timer 3
!TYPE bool
!DEFAULT 0
!BRIEF report indexing time
!END
!PARAMETER -status 4
!TYPE int
!DEFAULT 0
!BRIEF print indexing status every -status records
!END
!END
!END
"""
def main(argv=[__name__]):
itf = oechem.OEInterface(InterfaceData)
oemedchem.OEConfigureMCSFragDatabaseOptions(itf)
if not oechem.OEParseCommandLine(itf, argv):
oechem.OEThrow.Fatal("Unable to interpret command line!")
MCSFragIndex(itf)
if __name__ == "__main__":
sys.exit(main(sys.argv))