Bemis Murcko perception

A program that perceives the [Bemis-1996] regions of the input structures and outputs role information and annotations as SD data in the output. Uncoloring of the fragment regions to remove atom types and properties can optionally be requested for the output SMILES string annotations.

See also

Command Line Interface

Usage: ./BemisMurckoPerception input.sdf output.sdf [ -uncolor ] [-unsaturatedHeteroBonds] [ -smartsSubstituents substituentsString ]
[-regionType regionTypeString]

Code

Download code

BemisMurckoPerception.py

#!/usr/bin/env python
# (C) 2022 Cadence Design Systems, Inc. (Cadence) 
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.

#############################################################################
# Utility to fragment the input structures by Bemis-Murcko rules
# ---------------------------------------------------------------------------
# BemisMurckoPerception.py [ -uncolor ] [-i] <input_mols> [-o] <output_mols>
# [-unsatHetero] [-sub] <smarts_substituents>
# input_mols: filename of molecules to fragment and uncolor
# output_mols: filename of output structures annotated with SD data of perceived regions
# [ -uncolor ]: optional arg to request uncoloring of output fragment info
# [-unsatHetero]:  optional arg to include unsaturated bonds on hetero atoms to framework
# [-sub]: optional arg to include substituents specified by SMARTS pattern on framework
#############################################################################
from openeye import oechem
from openeye import oemedchem
import sys

############################################################
InterfaceData = """
!BRIEF [ -uncolor ] [-i] <infile1> [-o] <infile2> [-unsatHetero] [-r] <regionType> [-sub] <smartsSubs>
!PARAMETER -i
  !ALIAS -in
  !ALIAS -input
  !TYPE string
  !REQUIRED true
  !BRIEF Input structure file name
  !KEYLESS 1
!END
!PARAMETER -o
  !ALIAS -out
  !ALIAS -output
  !TYPE string
  !REQUIRED true
  !BRIEF Output SD file name
  !KEYLESS 2
!END
!PARAMETER -uncolor
  !ALIAS -u
  !TYPE bool
  !DEFAULT false
  !BRIEF Uncolor output molecules
!END
!PARAMETER -unsatHetero
  !ALIAS -uhb
  !TYPE bool
  !DEFAULT false
  !BRIEF Include sidechains on main framework if connected by unsaturated bonds to hetero atoms.
!END
!PARAMETER -regionType
  !ALIAS -r
  !TYPE string
  !DEFAULT All
  !BRIEF Region type of fragments to include. Valid inputs are All, Framework, Ring, Linker, and Sidechain.
!END
!PARAMETER -smartsSubstituents
  !ALIAS -sub
  !TYPE string
  !DEFAULT None
  !BRIEF SMARTS string for custom substituents to be included in framework.
!END
"""


def main(argv=[__name__]):
    itf = oechem.OEInterface(InterfaceData, argv)

    # flag on command line indicates uncoloring option or not
    bUncolor = itf.GetBool("-uncolor")

    # optional unsaturated hetero bonds flag
    bUnsaturatedHeteroBonds = itf.GetBool("-unsatHetero")

    # optional user-specified SMARTS strings for substituents
    smartsSubstituentsString = itf.GetString("-smartsSubstituents")

    # optional user-specified SMARTS strings for substituents
    regionTypeString = itf.GetString("-regionType")

    options = oemedchem.OEBemisMurckoOptions()

    if bUnsaturatedHeteroBonds:
        options.SetUnsaturatedHeteroBonds(True)

    if smartsSubstituentsString != "None":
        ss = oechem.OESubSearch()
        if (not ss.Init(smartsSubstituentsString)):
            oechem.OEThrow.Fatal("Invalid SMARTS for subsearch.")
        options.SetSubstituentSearch(ss)

    if len(regionTypeString):
        options.SetRegionType(regionTypeString)

    # input structure(s) to transform
    ifsmols = oechem.oemolistream()
    if not ifsmols.open(itf.GetString("-i")):
        oechem.OEThrow.Fatal(
            "Unable to open %s for reading" %
            itf.GetString("-i"))

    # save output structure(s) to this file
    ofs = oechem.oemolostream()
    if not ofs.open(itf.GetString("-o")):
        oechem.OEThrow.Fatal(
            "Unable to open %s for writing" %
            itf.GetString("-o"))
    if not oechem.OEIsSDDataFormat(ofs.GetFormat()):
        oechem.OEThrow.Fatal(
            "Output file format does not support SD data: %s" %
            itf.GetString("-o"))

    irec = 0
    ototal = 0
    frag = oechem.OEGraphMol()
    for mol in ifsmols.GetOEGraphMols():
        irec += 1
        oechem.OEDeleteEverythingExceptTheFirstLargestComponent(mol)
        iter = oemedchem.OEGetBemisMurcko(mol, options)
        if not iter.IsValid():
            name = mol.GetTitle()
            if not mol.GetTitle():
                name = 'Record ' + str(irec)
            oechem.OEThrow.Warning("%s: no perceived regions" % name)
            continue
        for bmregion in iter:
            # create a fragment from the perceived region
            oechem.OESubsetMol(frag, mol, bmregion, True)
            if bUncolor:
                # ignore 3D stereo parities
                if (frag.GetDimension() == 3):
                    frag.SetDimension(0)
                # uncolor the fragment
                oechem.OEUncolorMol(frag)
            smi = oechem.OEMolToSmiles(frag)
            # annotate the input molecule with the role information
            for role in bmregion.GetRoles():
                oechem.OEAddSDData(mol, role.GetName(), smi)
        ototal += 1
        oechem.OEWriteMolecule(ofs, mol)

    if not irec:
        oechem.OEThrow.Fatal('No records in input structure file to perceive')

    if not ototal:
        oechem.OEThrow.Warning('No annotated structures generated')

    print("Input molecules={0:d}, output annotated {1:s}molecules={2:d}"
          .format(irec, ("(uncolored) " if bUncolor else ""), ototal))

    return 0


if __name__ == "__main__":
    sys.exit(main(sys.argv))