OEChem Examples

Convert molecule files

#!/usr/bin/env python
#############################################################################
# Copyright (C) 2003-2015 OpenEye Scientific Software, Inc.
#############################################################################
# Program to convert from one molecule format to another
#############################################################################
import sys
from openeye.oechem import *


def main(argv=sys.argv):
    if len(argv) != 3:
        OEThrow.Usage("%s <infile> <outfile>" % argv[0])

    ifs = oemolistream()
    if not ifs.open(argv[1]):
        OEThrow.Fatal("Unable to open %s for reading" % argv[1])

    ofs = oemolostream()
    if not ofs.open(argv[2]):
        OEThrow.Fatal("Unable to open %s for writing" % argv[2])

    for mol in ifs.GetOEMols():
        OEWriteMolecule(ofs, mol)


if __name__ == "__main__":
    sys.exit(main(sys.argv))

See also

Concatenating molecules

#!/usr/bin/env python
#############################################################################
# Copyright (C) 2008-2015 OpenEye Scientific Software, Inc.
#############################################################################
# This program concatenates molecules into one file.
# It can be useful for generating ROCS queries or reattach ligands to an
# protein structure
#############################################################################
import sys
from openeye.oechem import *


def CatMols(infnames, outfname):
    omol = OEGraphMol()
    for fname in infnames:
        ifs = oemolistream()
        if ifs.open(fname):
            for imol in ifs.GetOEGraphMols():
                OEAddMols(omol, imol)
        else:
            OEThrow.Fatal("Unable to open %s for reading" % fname)

    ofs = oemolostream()
    if not ofs.open(outfname):
        OEThrow.Fatal("Unable to open %s for writing" % outfname)

    OEWriteMolecule(ofs, omol)


Interface = """
!BRIEF -i <infile1> [<infile2>...] -o <outfile>
!PARAMETER -i
  !ALIAS -in
  !TYPE string
  !LIST true
  !REQUIRED true
  !BRIEF input file name(s)
!END
!PARAMETER -o
  !ALIAS -out
  !TYPE string
  !REQUIRED true
  !BRIEF output file name
!END
"""


def main(argv=[__name__]):
    itf = OEInterface(Interface, argv)

    CatMols(itf.GetStringList("-i"), itf.GetString("-o"))

if __name__ == "__main__":
    sys.exit(main(sys.argv))

See also

Splitting multicomponent molecules

#!/usr/bin/env python
#############################################################################
# Copyright (C) 2003-2014 OpenEye Scientific Software, Inc.
#############################################################################
# Writes each component of a molecule as a separate molecule
#############################################################################

from __future__ import print_function
import sys
from openeye.oechem import *


def main(argv=[__name__]):
    if len(argv) != 3:
        OEThrow.Usage("%s <infile> <outfile>" % argv[0])

    ifs = oemolistream()
    if not ifs.open(argv[1]):
        OEThrow.Fatal("Unable to open %s for reading" % argv[1])

    ofs = oemolostream()
    if not ofs.open(argv[2]):
        OEThrow.Fatal("Unable to open %s for writing" % argv[2])

    for mol in ifs.GetOEGraphMols():
        numparts, partlist = OEDetermineComponents(mol)
        pred = OEPartPredAtom(partlist)

        for i in range(1, numparts + 1):
            pred.SelectPart(i)
            partmol = OEGraphMol()
            OESubsetMol(partmol, mol, pred)
            OEWriteMolecule(ofs, partmol)


if __name__ == "__main__":
    sys.exit(main(sys.argv))

Extract molecules by title

#!/usr/bin/env python
#############################################################################
# Copyright (C) 2003-2015 OpenEye Scientific Software, Inc.
#############################################################################
# Extract compound(s) from a file based on molecule title
#############################################################################
import sys

try:
    set()
except NameError:
    from sets import Set as set
from openeye.oechem import *


def MolExtract(ifs, ofs, nameset):
    for mol in ifs.GetOEMols():
        title = mol.GetTitle()
        if title in nameset:
            OEWriteMolecule(ofs, mol)


def main(argv=[__name__]):
    itf = OEInterface(InterfaceData, argv)
    haslist = itf.HasString("-list")
    hastitle = itf.HasString("-title")
    if not (haslist ^ hastitle):
        OEThrow.Usage("Must give either -list or -title")

    ifs = oemolistream()
    if not ifs.open(itf.GetString("-i")):
        OEThrow.Fatal("Unable to open %s for reading" % itf.GetString("-i"))

    ofs = oemolostream()
    if not ofs.open(itf.GetString("-o")):
        OEThrow.Fatal("Unable to open %s for writing" % itf.GetString("-o"))

    # collect names
    nameset = set()
    if itf.HasString("-list"):
        try:
            lfs = open(itf.GetString("-list"))
        except IOError:
            OEThrow.Fatal("Unable to open %s for reading" % itf.GetString("-list"))
        for name in lfs.readlines():
            name = name.strip()
            nameset.add(name)
    elif itf.HasString("-title"):
        nameset.add(itf.GetString("-title"))

    if len(nameset) == 0:
        OEThrow.Fatal("No titles requested")

    MolExtract(ifs, ofs, nameset)

InterfaceData = """\
!BRIEF -title title | -list <moltitles.file> [-i] <input> [-o] <output>
!PARAMETER -i
  !ALIAS -in
  !TYPE string
  !REQUIRED true
  !BRIEF Input file name
  !KEYLESS 1
!END
!PARAMETER -o
  !ALIAS -out
  !TYPE string
  !REQUIRED true
  !BRIEF Output file name
  !KEYLESS 2
!END
!PARAMETER -title
  !ALIAS -t
  !TYPE string
  !BRIEF Single mol title to extract
!END
!PARAMETER -list
  !ALIAS -l
  !TYPE string
  !BRIEF List file of mol titles to extract
!END
"""

if __name__ == "__main__":
    sys.exit(main(sys.argv))

See also

Write out unique molecules

#!/usr/bin/env python
#############################################################################
# Copyright (C) 2003-2015 OpenEye Scientific Software, Inc.
#############################################################################
# Read molecules and write out the unique ones. Two molecules are considered
# identical if their canonical isomeric smiles are identical.
#############################################################################
import sys
from openeye.oechem import *


def UniqMol(ifs, ofs):
    smiles = {}

    for mol in ifs.GetOEMols():
        smi = OEMolToSmiles(mol)
        if smi not in smiles:
            smiles[smi] = True
            OEWriteMolecule(ofs, mol)


def main(argv=[__name__]):
    if len(argv) != 3:
        OEThrow.Usage("%s <infile> <outfile>" % argv[0])

    ifs = oemolistream()
    if not ifs.open(argv[1]):
        OEThrow.Fatal("Unable to open %s for reading" % argv[1])

    ofs = oemolostream()
    if not ofs.open(argv[2]):
        OEThrow.Fatal("Unable to open %s for writing" % argv[2])

    UniqMol(ifs, ofs)


if __name__ == "__main__":
    sys.exit(main(sys.argv))
#!/usr/bin/env python
#############################################################################
# Copyright (C) 2014-2015 OpenEye Scientific Software, Inc.
#############################################################################
# Read molecules and write out the unique ones. Two molecules are considered
# identical if their InChIs are identical.
#############################################################################
import sys
from openeye.oechem import *


def UniqInChI(ifs, ofs):
    inchis = {}

    for mol in ifs.GetOEMols():
        inchi = OECreateInChI(mol)
        if inchi not in inchis:
            inchis[inchi] = True
            OEWriteMolecule(ofs, mol)


def main(argv=[__name__]):
    if len(argv) != 3:
        OEThrow.Usage("%s <infile> <outfile>" % argv[0])

    ifs = oemolistream()
    if not ifs.open(argv[1]):
        OEThrow.Fatal("Unable to open %s for reading" % argv[1])

    ofs = oemolostream()
    if not ofs.open(argv[2]):
        OEThrow.Fatal("Unable to open %s for writing" % argv[2])

    UniqInChI(ifs, ofs)

if __name__ == "__main__":
    sys.exit(main(sys.argv))

See also

Randomize atoms of molecules

#!/usr/bin/env python
#############################################################################
# Copyright (C) 2003-2015 OpenEye Scientific Software, Inc.
#############################################################################
# Demonstrates how to randomly reorder atoms and bonds of a molecule
#############################################################################
import sys
from openeye.oechem import *


def OrderMolecules(ifs, ofs):
    for mol in ifs.GetOEGraphMols():
        OEScrambleMolecule(mol)
        OEWriteMolecule(ofs, mol)


def main(argv=[__name__]):
    if len(argv) != 3:
        OEThrow.Usage("%s <infile> <outfile>" % argv[0])

    ifs = oemolistream()
    if not ifs.open(argv[1]):
        OEThrow.Fatal("Unable to open %s for reading" % argv[1])

    ofs = oemolostream()
    if not ofs.open(argv[2]):
        OEThrow.Fatal("Unable to open %s for writing" % argv[2])

    OrderMolecules(ifs, ofs)


if __name__ == "__main__":
    sys.exit(main(sys.argv))

See also

Generate canonical smiles

#!/usr/bin/env python
#############################################################################
# Copyright (C) 2003-2015 OpenEye Scientific Software, Inc.
#############################################################################
# Generate canonical smiles of various flavors
#############################################################################
import sys
from openeye.oechem import *

#############################################################################
# To create unique Kekule smiles, must reperceive bond orders from
# scratch to avoid arbitrary non-deterministic variations, e.g.,
# CC1=C(O)C=CC=C1 vs. CC1=CC=CC=C1O
# This is why OESMILESFlag_Kekule is not sufficient and not used.
#############################################################################


def CanSmi(mol, isomeric, kekule):
    OEFindRingAtomsAndBonds(mol)
    OEAssignAromaticFlags(mol, OEAroModel_OpenEye)
    smiflag = OESMILESFlag_Canonical
    if isomeric:
        smiflag |= OESMILESFlag_ISOMERIC

    if kekule:
        for bond in mol.GetBonds(OEIsAromaticBond()):
            bond.SetIntType(5)
        OECanonicalOrderAtoms(mol)
        OECanonicalOrderBonds(mol)
        OEClearAromaticFlags(mol)
        OEKekulize(mol)

    smi = OECreateSmiString(mol, smiflag)
    return smi


def main(argv=[__name__]):
    itf = OEInterface(InterfaceData, argv)

    isomeric = itf.GetBool("-isomeric")
    kekule = itf.GetBool("-kekule")
    from3d = itf.GetBool("-from3d")

    if from3d:
        isomeric = True

    ifs = oemolistream()
    ifile = itf.GetString("-i")
    if not ifs.open(ifile):
        OEThrow.Fatal("Unable to open %s for reading" % ifile)

    if itf.HasString("-o"):
        ofile = itf.GetString("-o")
        try:
            ofs = open(ofile, 'w')
        except:
            OEThrow.Fatal("Unable to open %s for writing" % ofile)
    else:
        ofs = sys.stdout

    mol = OEGraphMol()
    while OEReadMolecule(ifs, mol):
        if from3d:
            OE3DToInternalStereo(mol)
        smi = CanSmi(mol, isomeric, kekule)
        if mol.GetTitle():
            smi += (" %s" % mol.GetTitle())
        ofs.write("%s\n" % smi)

InterfaceData = """\
!BRIEF [options] [-i] <input> [[-o] <output>]
!PARAMETER -i
  !ALIAS -in
  !TYPE string
  !REQUIRED true
  !BRIEF input file name
  !KEYLESS 1
!END
!PARAMETER -o
  !ALIAS -out
  !TYPE string
  !BRIEF output file name
  !KEYLESS 2
!END
!PARAMETER -isomeric
  !TYPE bool
  !DEFAULT false
  !BRIEF generate isomeric smiles
!END
!PARAMETER -from3d
  !TYPE bool
  !DEFAULT false
  !BRIEF perceive stereo from 3D coords
!END
!PARAMETER -kekule
  !TYPE bool
  !DEFAULT false
  !BRIEF generate kekule form
!END
"""

if __name__ == "__main__":
    sys.exit(main(sys.argv))

Filter molecules by weight or heavy atom count

#!/usr/bin/env python
#############################################################################
# Copyright (C) 2014-2015 OpenEye Scientific Software, Inc.
#############################################################################
# Filter out molecules by their molecular weight or heavy atom count
#############################################################################
import sys
from openeye.oechem import *


def IsBetween(min, max, val):
    if min <= val <= max:
        return True
    return False


def IsMoleculeInHeavyAtomCountRange(min, max, mol):
    count = OECount(mol, OEIsHeavy())
    return IsBetween(min, max, count)


def IsMoleculeInMolWtRange(min, max, mol):
    molwt = OECalculateMolecularWeight(mol)
    return IsBetween(min, max, molwt)


def main(argv=[__name__]):
    itf = OEInterface(InterfaceData, argv)

    ifs = oemolistream()
    if not ifs.open(itf.GetString("-i")):
        OEThrow.Fatal("Unable to open %s for reading" % itf.GetString("-i"))

    ofs = oemolostream(".ism")
    if itf.HasString("-o"):
        if not ofs.open(itf.GetString("-o")):
            OEThrow.Fatal("Unable to open %s for writing" % itf.GetString("-o"))

    minhac = float("-inf")
    if itf.HasInt("-minhac"):
        minhac = itf.GetInt("-minhac")
    maxhac = float("inf")
    if itf.HasInt("-maxhac"):
        maxhac = itf.GetInt("-maxhac")
    minwt = float("-inf")
    if itf.HasDouble("-minwt"):
        minwt = itf.GetDouble("-minwt")
    maxwt = float("inf")
    if itf.HasDouble("-maxwt"):
        maxwt = itf.GetDouble("-maxwt")

    for mol in ifs.GetOEMols():
        if not IsMoleculeInHeavyAtomCountRange(minhac, maxhac, mol):
            continue
        if not IsMoleculeInMolWtRange(minwt, maxwt, mol):
            continue

        OEWriteMolecule(ofs, mol)

InterfaceData = """
!BRIEF [-minhac <num>] [-maxhac <num>] [-minwt <num>] [-maxwt <num>] [-i] <input> [[-o] <output>]
!PARAMETER -i
  !TYPE string
  !REQUIRED true
  !BRIEF Input file name
  !KEYLESS 1
!END
!PARAMETER -o
  !TYPE string
  !REQUIRED false
  !BRIEF Output file name
  !KEYLESS 2
!END
!PARAMETER -minhac
  !TYPE int
  !REQUIRED false
  !BRIEF minimum heavy atom count
!END
!PARAMETER -maxhac
  !TYPE int
  !REQUIRED false
  !BRIEF maximum heavy atom count
!END
!PARAMETER -minwt
  !TYPE double
  !REQUIRED false
  !BRIEF minimum molecular weight
!END
!PARAMETER -maxwt
  !TYPE double
  !REQUIRED false
  !BRIEF maximum molecular weight
!END
"""

if __name__ == "__main__":
    sys.exit(main(sys.argv))

See also

Strip salts

#!/usr/bin/env python
#############################################################################
# Copyright (C) 2014-2015 OpenEye Scientific Software, Inc.
#############################################################################
# Remove salts and/or remove all but the largest molecule
#############################################################################
import sys
from openeye.oechem import *


def main(argv=[__name__]):
    if len(argv) != 3:
        OEThrow.Usage("%s <infile> <outfile>" % argv[0])

    ifs = oemolistream()
    if not ifs.open(argv[1]):
        OEThrow.Fatal("Unable to open %s for reading" % argv[1])

    ofs = oemolostream()
    if not ofs.open(argv[2]):
        OEThrow.Fatal("Unable to open %s for writing" % argv[2])

    for mol in ifs.GetOEMols():
        OETheFunctionFormerlyKnownAsStripSalts(mol)
        OEWriteMolecule(ofs, mol)


if __name__ == "__main__":
    sys.exit(main(sys.argv))

Extract rings

#!/usr/bin/env python
#######################################################################
# Copyright (C) 2004-2015 OpenEye Scientific Software, Inc.
#######################################################################
# Extracting rings/ring systems from input molecules
#######################################################################
import sys
from openeye.oechem import *


def RingSubSet(ifs, ofs, exo):
    for mol in ifs.GetOEGraphMols():
        submol = OEGraphMol()
        adjustHcount = True
        if exo:
            isinring = OEAtomIsInRing()
            isexo = OEIsNonRingAtomDoubleBondedToRing()
            includeexo = OEOrAtom(isinring, isexo)
            OESubsetMol(submol, mol, includeexo, adjustHcount)
        else:
            OESubsetMol(submol, mol, OEAtomIsInRing(), adjustHcount)
        submol.SetTitle(mol.GetTitle() + "_rings")
        if submol.NumAtoms() != 0:
            OEWriteMolecule(ofs, submol)


def main(argv=[__name__]):
    itf = OEInterface(InterfaceData, argv)

    exo_dbl_bonds = itf.GetBool("-exo")

    ifs = oemolistream()
    if not ifs.open(itf.GetString("-i")):
        OEThrow.Fatal("Unable to open %s for reading" % itf.GetString("-i"))

    ofs = oemolostream(".ism")
    if itf.HasString("-o"):
        if not ofs.open(itf.GetString("-o")):
            OEThrow.Fatal("Unable to open %s for writing" % itf.GetString("-o"))
    RingSubSet(ifs, ofs, exo_dbl_bonds)

InterfaceData = """
!BRIEF [-exo] [-i] <infile> [[-o] <outfile>]
!PARAMETER -i
  !ALIAS -in
  !TYPE string
  !REQUIRED true
  !BRIEF input file name
  !KEYLESS 1
!END
!PARAMETER -o
  !ALIAS -out
  !TYPE string
  !REQUIRED false
  !BRIEF output file name
  !KEYLESS 2
!END
!PARAMETER -exo
  !TYPE bool
  !DEFAULT true
  !BRIEF Include non-ring atoms double bonded to a ring
!END
"""

if __name__ == "__main__":
    sys.exit(main(sys.argv))

Extract molecule scaffolds

#!/usr/bin/env python
#############################################################################
# Copyright (C) 2009-2015 OpenEye Scientific Software, Inc.
#############################################################################
# Extract the ring scaffold of a molecule
#############################################################################
from openeye.oechem import *
try:
    set()
except NameError:
    from sets import Set as set
import sys


def TraverseForRing(visited, atom):
    visited.add(atom.GetIdx())

    for nbor in atom.GetAtoms():
        if nbor.GetIdx() not in visited:
            if nbor.IsInRing():
                return True

            if TraverseForRing(visited, nbor):
                return True

    return False


def DepthFirstSearchForRing(root, nbor):
    visited = set()
    visited.add(root.GetIdx())

    return TraverseForRing(visited, nbor)


class IsInScaffold(OEUnaryAtomPred):
    def __call__(self, atom):
        if atom.IsInRing():
            return True

        count = 0
        for nbor in atom.GetAtoms():
            if DepthFirstSearchForRing(atom, nbor):
                count += 1

        return count > 1


def main(argv=[__name__]):
    itf = OEInterface(InterfaceData, argv)

    exo_dbl_bonds = itf.GetBool("-exo")

    ifs = oemolistream()
    if not ifs.open(itf.GetString("-i")):
        OEThrow.Fatal("Unable to open %s for reading" % itf.GetString("-i"))

    ofs = oemolostream()
    if not ofs.open(itf.GetString("-o")):
        OEThrow.Fatal("Unable to open %s for writing" % itf.GetString("-o"))

    for src in ifs.GetOEMols():
        dst = OEMol()
        pred = IsInScaffold()
        if exo_dbl_bonds:
            pred = OEOrAtom(pred, OEIsNonRingAtomDoubleBondedToRing())

        adjustHcount = True
        OESubsetMol(dst, src, pred, adjustHcount)

        if dst.IsValid():
            OEWriteMolecule(ofs, dst)

InterfaceData = """
!BRIEF [-exo] [-i] <input> [-o] <scaffolds>
!PARAMETER -i
  !ALIAS -in
  !TYPE string
  !REQUIRED true
  !BRIEF input file name
  !KEYLESS 1
!END
!PARAMETER -o
  !ALIAS -out
  !TYPE string
  !REQUIRED true
  !BRIEF output file name
  !KEYLESS 2
!END
!PARAMETER -exo
  !TYPE bool
  !DEFAULT true
  !BRIEF Exclude double bonds exo to ring in scaffold
!END
"""

if __name__ == "__main__":
    sys.exit(main(sys.argv))

Extract random molecule subset

#!/usr/bin/env python
#############################################################################
#  Copyright (C) 2003-2015 OpenEye Scientific Software, Inc.
#############################################################################
#  Randomly reorder molecules and optionally obtain a random subset
#############################################################################
import sys
from random import Random
from openeye.oechem import *


def LoadDatabase(ifs, mlist, count):
    for pos, mol in enumerate(ifs.GetOEMols()):
        newmol = OEMol(mol, OEMCMolType_OEDBMCMol)
        newmol.Compress()
        mlist.append(newmol)
        if pos + 1 == count:
            break


def WriteDatabase(ofs, mlist, size):
    for mol in mlist[:size]:
        mol.UnCompress()
        OEWriteMolecule(ofs, mol)
        mol.Clear()


def RandomizePercent(ifs, ofs, percent, rand):
    mlist = []
    LoadDatabase(ifs, mlist, 0)

    rand.shuffle(mlist)

    size = len(mlist)
    size = int(percent * 0.01 * size)
    if size < 1:
        size = 1
    WriteDatabase(ofs, mlist, size)


def Randomize(ifs, ofs, rand):
    wholedb = 100
    RandomizePercent(ifs, ofs, wholedb, rand)


def RandomizeN(ifs, ofs, count, rand):
    mlist = []
    LoadDatabase(ifs, mlist, count)

    for pos, mol in enumerate(ifs.GetOEMols()):
        if float(count / float(count + pos + 1)) > rand.random():
            idx = int(float(count) * rand.random())
            newmol = OEMol(mol, OEMCMolType_OEDBMCMol)
            newmol.Compress()
            mlist[idx] = newmol

    rand.shuffle(mlist)
    WriteDatabase(ofs, mlist, count)


def main(argv=[__name__]):
    itf = OEInterface(InterfaceData, argv)

    if itf.HasFloat("-p") and itf.HasInt("-n"):
        OEThrow.Usage("Give only one option, -p or -n")

    ifs = oemolistream()
    if not ifs.open(itf.GetString("-i")):
        OEThrow.Fatal("Unable to open %s for reading" % itf.GetString("-i"))

    ofs = oemolostream(".ism")
    if itf.HasString("-o"):
        if not ofs.open(itf.GetString("-o")):
            OEThrow.Fatal("Unable to open %s for writing" % itf.GetString("-o"))

    if itf.HasInt("-seed"):
        rand = Random(itf.GetInt("-seed"))
    else:
        rand = Random()

    if itf.HasInt("-n"):
        RandomizeN(ifs, ofs, itf.GetInt("-n"), rand)
    elif itf.HasFloat("-p"):
        RandomizePercent(ifs, ofs, itf.GetFloat("-p"), rand)
    else:
        Randomize(ifs, ofs, rand)

InterfaceData = """
!BRIEF [-seed <int>] [-n <number>] [-p <percent>] [-i] <input> [-o] <output>
!PARAMETER -i
  !TYPE string
  !REQUIRED true
  !BRIEF Input file name
  !KEYLESS 1
!END
!PARAMETER -o
  !TYPE string
  !REQUIRED false
  !BRIEF Output file name
  !KEYLESS 2
!END
!PARAMETER -p
  !TYPE float
  !REQUIRED false
  !BRIEF Percentage of output molecules
!END
!PARAMETER -n
  !TYPE int
  !REQUIRED false
  !BRIEF Number of output molecules
!END
!PARAMETER -seed
  !TYPE int
  !REQUIRED false
  !BRIEF Integer value for random seed, default is system time
!END
"""

if __name__ == "__main__":
    sys.exit(main(sys.argv))

See also

Performing a reaction

#!/usr/bin/env python
#############################################################################
# Copyright (C) 2003-2017 OpenEye Scientific Software, Inc.
#############################################################################
# Perform reactions on the given compounds
#############################################################################
import sys
from openeye.oechem import *


def UniMolRxn(ifs, ofs, umr):
    for mol in ifs.GetOEGraphMols():
        if umr(mol):
            OEWriteMolecule(ofs, mol)


def main(argv=[__name__]):
    if not (3 <= len(argv) <= 4):
        OEThrow.Usage("%s SMIRKS <infile> [<outfile>]" % argv[0])

    qmol = OEQMol()
    if not OEParseSmirks(qmol, argv[1]):
        OEThrow.Fatal("Unable to parse SMIRKS: %s" % argv[1])

    umr = OEUniMolecularRxn()
    if not umr.Init(qmol):
        OEThrow.Fatal("Failed to initialize reaction with %s SMIRKS" % argv[1])
    umr.SetClearCoordinates(True)

    ifs = oemolistream()
    if not ifs.open(argv[2]):
        OEThrow.Fatal("Unable to open %s for reading" % argv[2])

    ofs = oemolostream(".ism")
    if len(argv) == 4:
        if not ofs.open(argv[3]):
            OEThrow.Fatal("Unable to open %s for writing" % argv[3])

    UniMolRxn(ifs, ofs, umr)

if __name__ == "__main__":
    sys.exit(main(sys.argv))

See also

Library generation

#!/usr/bin/env python
#############################################################################
# Copyright (C) 2003-2017 OpenEye Scientific Software, Inc.
#############################################################################
# Perform library generation with SMIRKS
#############################################################################
import sys
from openeye.oechem import *


def LibGen(libgen, ofs, unique, isomeric):
    smiflag = OESMILESFlag_DEFAULT  # Canonical|AtomMaps|Rgroup
    if isomeric:
        smiflag |= OESMILESFlag_ISOMERIC
    # access products
    uniqproducts = []
    for mol in libgen.GetProducts():
        smiles = OECreateSmiString(mol, smiflag)
        if not unique or smiles not in uniqproducts:
            uniqproducts.append(smiles)
            OEWriteMolecule(ofs, mol)


def main(argv=[__name__]):
    itf = OEInterface(InterfaceData, argv)

    if not itf.HasString("-smirks") and not itf.HasString("-rxn"):
        OEThrow.Fatal("Please provide SMIRKS string or MDL reaction file")

    if itf.HasString("-smirks") and itf.HasString("-rxn"):
        OEThrow.Fatal("Please provide only SMIRKS string or MDL reaction file")

    reaction = OEQMol()
    if itf.HasString("-smirks"):
        smirks = itf.GetString("-smirks")
        if not OEParseSmirks(reaction, smirks):
            OEThrow.Fatal("Unable to parse SMIRKS: %s" % smirks)
    else:
        rxn = itf.GetString("-rxn")
        rfile = oemolistream(rxn)
        opt = OEMDLQueryOpts_ReactionQuery | OEMDLQueryOpts_SuppressExplicitH
        if not OEReadMDLReactionQueryFile(rfile, reaction, opt):
            OEThrow.Fatal("Unable to read reaction file: %s" % rxn)

    relax = itf.GetBool("-relax")
    unique = itf.GetBool("-unique")
    implicitH = itf.GetBool("-implicitH")
    valcorrect = itf.GetBool("-valence")
    isomeric = itf.GetBool("-isomeric")

    libgen = OELibraryGen()
    if not libgen.Init(reaction, not relax):
        OEThrow.Fatal("failed to initialize library generator")
    libgen.SetValenceCorrection(valcorrect)
    libgen.SetExplicitHydrogens(not implicitH)
    libgen.SetClearCoordinates(True)

    ofs = oemolostream(".smi")
    if itf.HasString("-product"):
        ofs.open(itf.GetString("-product"))

    nrReacts = 0
    while itf.HasString("-reactants", nrReacts):
        fileName = itf.GetString("-reactants", nrReacts)
        if nrReacts >= libgen.NumReactants():
            OEThrow.Fatal("Number of reactant files exceeds number of reactants specified in reaction")
        ifs = oemolistream()
        if not ifs.open(fileName):
            OEThrow.Fatal("Unable to read %s reactant file" % fileName)
        for mol in ifs.GetOEGraphMols():
            libgen.AddStartingMaterial(mol, nrReacts, unique)
        nrReacts += 1

    if nrReacts != libgen.NumReactants():
        OEThrow.Fatal("Reactions requires %d reactant files!" % libgen.NumReactants())
    LibGen(libgen, ofs, unique, isomeric)


InterfaceData = """
!BRIEF [options] [-smirks <string> | -rxn <rfile>] -reactants <infile> [-product <outfile>]
!CATEGORY "input/output options"

  !PARAMETER -smirks
    !ALIAS -s
    !TYPE string
    !VISIBILITY simple
    !BRIEF SMIRKS reaction string
  !END

  !PARAMETER -rxn
    !TYPE string
    !VISIBILITY simple
    !BRIEF MDL reaction file
  !END

  !PARAMETER -reactants
    !ALIAS -r
    !TYPE string
    !LIST true
    !REQUIRED true
    !VISIBILITY simple
    !BRIEF list of input reactant filenames
  !END

  !PARAMETER -product
    !ALIAS -p
    !TYPE string
    !REQUIRED false
    !VISIBILITY simple
    !BRIEF output product filename
  !END
!END

!CATEGORY "OELibraryGen options"

  !PARAMETER -relax
    !TYPE bool
    !REQUIRED false
    !DEFAULT false
    !VISIBILITY simple
    !BRIEF unmapped atoms on reactant side are not deleted during reaction
  !END

  !PARAMETER -implicitH
    !TYPE bool
    !REQUIRED false
    !DEFAULT false
    !VISIBILITY simple
    !BRIEF reaction will be perfomed using implicit hydrogens
  !END

  !PARAMETER -valence
    !TYPE bool
    !REQUIRED false
    !DEFAULT false
    !VISIBILITY simple
    !BRIEF automatic valence correction will be applied
  !END

!END

!CATEGORY "product smiles generation options"

  !PARAMETER -unique
    !TYPE bool
    !REQUIRED false
    !DEFAULT false
    !VISIBILITY simple
    !BRIEF only unique product canonical smiles will be written
  !END

  !PARAMETER -isomeric
    !TYPE bool
    !REQUIRED false
    !DEFAULT false
    !VISIBILITY simple
    !BRIEF include atom and bond stereochemistry in product smiles string
  !END

!END
"""
if __name__ == "__main__":
    sys.exit(main(sys.argv))

See also

Perform substructure searches

#!/usr/bin/env python
#############################################################################
# Copyright (C) 2003-2015 OpenEye Scientific Software, Inc.
#############################################################################
# Perform substructure search on molecule file
#############################################################################
from __future__ import print_function
import sys
from openeye.oechem import *


def SubSearch(itf, ss, ifs, ofs):
    reverseflag = itf.GetBool("-r")
    countflag = itf.GetBool("-c")
    count = 0
    for mol in ifs.GetOEGraphMols():
        OEPrepareSearch(mol, ss)
        if ss.SingleMatch(mol) != reverseflag:
            if countflag:
                count += 1
            else:
                OEWriteMolecule(ofs, mol)
    if countflag:
        print ("%d matching molecules\n" % (count), end=" ")


def main(argv=[__name__]):
    itf = OEInterface(InterfaceData, argv)

    if not (itf.GetBool("-c") ^ itf.HasString("-o")):
        OEThrow.Fatal("Counting (-c) or output (-o) must be specified and are mutually exclusive.")

    ifs = oemolistream()
    filename = itf.GetString("-i")
    if not ifs.open(filename):
        OEThrow.Fatal("Unable to open %s for reading" % filename)

    ofs = oemolostream()
    if not itf.GetBool("-c"):
        filename = itf.GetString("-o")
        if not ofs.open(filename):
            OEThrow.Fatal("Unable to open %s for writing" % filename)

    smarts = itf.GetString("-p")
    ss = OESubSearch()
    if not ss.Init(smarts):
        OEThrow.Fatal("Unable to parse SMARTS: %s" % smarts)

    SubSearch(itf, ss, ifs, ofs)

InterfaceData = """
!BRIEF  [-r][-c] -p smarts [-i] <input> [[-o] <output>]
!PARAMETER -i 1
  !ALIAS -in
  !TYPE string
  !REQUIRED true
  !BRIEF Input file name
  !KEYLESS 1
!END
!PARAMETER -p 2
  !TYPE string
  !REQUIRED true
  !BRIEF SMARTS pattern, quote for safety
!END
!PARAMETER -o 3
  !ALIAS -out
  !TYPE string
  !BRIEF Output file name
  !KEYLESS 2
!END
!PARAMETER -r 4
  !ALIAS -v
  !TYPE bool
  !DEFAULT false
  !BRIEF Reverse logic, not matched
!END
!PARAMETER -c 5
  !TYPE bool
  !DEFAULT false
  !BRIEF Just output count of number matched
!END
"""

if __name__ == "__main__":
    sys.exit(main(sys.argv))

See also

Align molecules by maximum common substructure

#!/usr/bin/env python
#############################################################################
# Copyright (C) 2003-2015 OpenEye Scientific Software, Inc.
#############################################################################
# Align two compounds based on the maximum common substructure
#############################################################################
import sys
from openeye.oechem import *


def MCSAlign(refmol, fitmol, ofs):
    atomexpr = OEExprOpts_AtomicNumber | OEExprOpts_Aromaticity
    bondexpr = 0
    mcss = OEMCSSearch(OEMCSType_Exhaustive)
    mcss.Init(refmol, atomexpr, bondexpr)
    mcss.SetMCSFunc(OEMCSMaxBondsCompleteCycles())

    rmat = OEDoubleArray(9)
    trans = OEDoubleArray(3)
    unique = True
    overlay = True
    for match in mcss.Match(fitmol, unique):
        rms = OERMSD(mcss.GetPattern(), fitmol, match, overlay, rmat, trans)
        if rms < 0.0:
            OEThrow.Warning("RMS overlay failure")
            continue
        OERotate(fitmol, rmat)
        OETranslate(fitmol, trans)
        OEWriteMolecule(ofs, fitmol)


def main(argv=[__name__]):
    if len(argv) != 4:
        OEThrow.Usage("%s <refmol> <fitmol> <outfile>" % argv[0])

    reffs = oemolistream()
    if not reffs.open(argv[1]):
        OEThrow.Fatal("Unable to open %s for reading" % argv[1])
    if not OEIs3DFormat(reffs.GetFormat()):
        OEThrow.Fatal("Invalid input format: need 3D coordinates")
    refmol = OEGraphMol()
    if not OEReadMolecule(reffs, refmol):
        OEThrow.Fatal("Unable to read molecule in %s" % argv[1])
    if not refmol.GetDimension() == 3:
        OEThrow.Fatal("%s doesn't have 3D coordinates" % refmol.GetTitle())

    fitfs = oemolistream()
    if not fitfs.open(argv[2]):
        OEThrow.Fatal("Unable to open %s for reading" % argv[2])
    if not OEIs3DFormat(fitfs.GetFormat()):
        OEThrow.Fatal("Invalid input format: need 3D coordinates")

    ofs = oemolostream()
    if not ofs.open(argv[3]):
        OEThrow.Fatal("Unable to open %s for writing" % argv[3])
    if not OEIs3DFormat(ofs.GetFormat()):
        OEThrow.Fatal("Invalid output format: need 3D coordinates")

    OEWriteConstMolecule(ofs, refmol)
    OESuppressHydrogens(refmol)

    for fitmol in fitfs.GetOEGraphMols():
        if not fitmol.GetDimension() == 3:
            OEThrow.Warning("%s doesn't have 3D coordinates" % fitmol.GetTitle())
            continue
        MCSAlign(refmol, fitmol, ofs)

if __name__ == "__main__":
    sys.exit(main(sys.argv))

See also

Align molecules by clique match

#!/usr/bin/env python
#############################################################################
# Copyright (C) 2014-2015 OpenEye Scientific Software, Inc.
#############################################################################
# Align two compounds based on the clique match
#############################################################################
import sys
from openeye.oechem import *


def CliqueAlign(refmol, fitmol, ofs):
    cs = OECliqueSearch(refmol, OEExprOpts_DefaultAtoms, OEExprOpts_DefaultBonds)
    cs.SetSaveRange(5)
    cs.SetMinAtoms(6)
    for mi in cs.Match(fitmol):
        rmat = OEDoubleArray(9)
        trans = OEDoubleArray(3)
        overlay = True
        OERMSD(cs.GetPattern(), fitmol, mi, overlay, rmat, trans)
        OERotate(fitmol, rmat)
        OETranslate(fitmol, trans)
        OEWriteMolecule(ofs, fitmol)


def main(argv=[__name__]):
    if len(argv) != 4:
        OEThrow.Usage("%s <refmol> <fitmol> <outfile>" % argv[0])

    reffs = oemolistream()
    if not reffs.open(argv[1]):
        OEThrow.Fatal("Unable to open %s for reading" % argv[1])
    if not OEIs3DFormat(reffs.GetFormat()):
        OEThrow.Fatal("Invalid input format: need 3D coordinates")
    refmol = OEGraphMol()
    if not OEReadMolecule(reffs, refmol):
        OEThrow.Fatal("Unable to read molecule in %s" % argv[1])
    if not refmol.GetDimension() == 3:
        OEThrow.Fatal("%s doesn't have 3D coordinates" % refmol.GetTitle())

    fitfs = oemolistream()
    if not fitfs.open(argv[2]):
        OEThrow.Fatal("Unable to open %s for reading" % argv[2])
    if not OEIs3DFormat(fitfs.GetFormat()):
        OEThrow.Fatal("Invalid input format: need 3D coordinates")

    ofs = oemolostream()
    if not ofs.open(argv[3]):
        OEThrow.Fatal("Unable to open %s for writing" % argv[3])
    if not OEIs3DFormat(ofs.GetFormat()):
        OEThrow.Fatal("Invalid output format: need 3D coordinates")

    OEWriteConstMolecule(ofs, refmol)
    OESuppressHydrogens(refmol)

    for fitmol in fitfs.GetOEGraphMols():
        if not fitmol.GetDimension() == 3:
            OEThrow.Warning("%s doesn't have 3D coordinates" % fitmol.GetTitle())
            continue
        CliqueAlign(refmol, fitmol, ofs)

if __name__ == "__main__":
    sys.exit(main(sys.argv))

See also

Align molecules by SMARTS

#!/usr/bin/env python
#############################################################################
# Copyright (C) 2014-2015 OpenEye Scientific Software, Inc.
#############################################################################
# Align two compounds based on smarts match
#############################################################################
import sys
from openeye.oechem import *


def SmartsAlign(refmol, fitmol, ss, ofs):
    unique = True
    for match1 in ss.Match(refmol, unique):
        for match2 in ss.Match(fitmol, unique):
            match = OEMatch()
            for mp1, mp2 in zip(match1.GetAtoms(), match2.GetAtoms()):
                match.AddPair(mp1.target, mp2.target)

            overlay = True
            rmat = OEDoubleArray(9)
            trans = OEDoubleArray(3)
            OERMSD(refmol, fitmol, match, overlay, rmat, trans)
            OERotate(fitmol, rmat)
            OETranslate(fitmol, trans)
            OEWriteConstMolecule(ofs, fitmol)


def main(argv=[__name__]):
    if len(argv) != 5:
        OEThrow.Usage("%s <refmol> <fitmol> <outfile> <smarts>" % argv[0])

    reffs = oemolistream()
    if not reffs.open(argv[1]):
        OEThrow.Fatal("Unable to open %s for reading" % argv[1])
    if not OEIs3DFormat(reffs.GetFormat()):
        OEThrow.Fatal("Invalid input format: need 3D coordinates")
    refmol = OEGraphMol()
    if not OEReadMolecule(reffs, refmol):
        OEThrow.Fatal("Unable to read molecule in %s" % argv[1])
    if not refmol.GetDimension() == 3:
        OEThrow.Fatal("%s doesn't have 3D coordinates" % refmol.GetTitle())

    fitfs = oemolistream()
    if not fitfs.open(argv[2]):
        OEThrow.Fatal("Unable to open %s for reading" % argv[2])
    if not OEIs3DFormat(fitfs.GetFormat()):
        OEThrow.Fatal("Invalid input format: need 3D coordinates")

    ofs = oemolostream()
    if not ofs.open(argv[3]):
        OEThrow.Fatal("Unable to open %s for writing" % argv[3])
    if not OEIs3DFormat(ofs.GetFormat()):
        OEThrow.Fatal("Invalid output format: need 3D coordinates")

    OEWriteConstMolecule(ofs, refmol)

    ss = OESubSearch()
    if not ss.Init(argv[4]):
        OEThrow.Fatal("Unable to parse SMARTS: %s" % argv[4])

    OEPrepareSearch(refmol, ss)
    if not ss.SingleMatch(refmol):
        OEThrow.Fatal("SMARTS fails to match refmol")

    for fitmol in fitfs.GetOEGraphMols():
        if not fitmol.GetDimension() == 3:
            OEThrow.Warning("%s doesn't have 3D coordinates" % fitmol.GetTitle())
            continue
        OEPrepareSearch(fitmol, ss)
        if not ss.SingleMatch(fitmol):
            OEThrow.Warning("SMARTS fails to match fitmol %s" % fitmol.GetTitle())
            continue
        SmartsAlign(refmol, fitmol, ss, ofs)


if __name__ == "__main__":
    sys.exit(main(sys.argv))

See also

Align multi-conformer molecules

#!/usr/bin/env python
#############################################################################
#  Copyright (C) 2003-2015 OpenEye Scientific Software, Inc.
#############################################################################
# Performing RMSD calculation between a 3D reference molecule and
# multi-conformation molecules
#############################################################################
import sys
from openeye.oechem import *


def main(argv=[__name__]):
    itf = OEInterface(InterfaceData, argv)

    if not itf.GetBool("-verbose"):
        OEThrow.SetLevel(OEErrorLevel_Warning)

    rfname = itf.GetString("-ref")
    ifname = itf.GetString("-in")

    automorph = itf.GetBool("-automorph")
    heavy = itf.GetBool("-heavyonly")
    overlay = itf.GetBool("-overlay")

    ifs = oemolistream()
    if not ifs.open(rfname):
        OEThrow.Fatal("Unable to open %s for reading" % rfname)

    rmol = OEGraphMol()
    if not OEReadMolecule(ifs, rmol):
        OEThrow.Fatal("Unable to read reference molecule")

    ifs = oemolistream()
    if not ifs.open(ifname):
        OEThrow.Fatal("Unable to open %s for reading" % ifname)

    ofs = oemolostream()
    if itf.HasString("-out"):
        ofname = itf.GetString("-out")
        if not ofs.open(ofname):
            OEThrow.Fatal("Unable to open %s for writing" % ofname)
        if not overlay:
            OEThrow.Warning("Output is the same as input when overlay is false")

    for mol in ifs.GetOEMols():
        OEThrow.Info(mol.GetTitle())

        rmsds = OEDoubleArray(mol.GetMaxConfIdx())
        rmtx = OEDoubleArray(9 * mol.GetMaxConfIdx())
        tmtx = OEDoubleArray(3 * mol.GetMaxConfIdx())

        # perform RMSD for all confomers
        OERMSD(rmol, mol, rmsds, automorph, heavy, overlay, rmtx, tmtx)

        for conf in mol.GetConfs():
            cidx = conf.GetIdx()
            OEThrow.Info("Conformer %i : rmsd = %f" % (cidx, rmsds[cidx]))

            if itf.GetBool("-overlay"):
                OERotate(conf, rmtx[cidx * 9: cidx * 9 + 9])
                OETranslate(conf, tmtx[cidx * 3: cidx * 3 + 3])

        if itf.HasString("-out"):
            OEWriteMolecule(ofs, mol)

    return 0

#############################################################################

InterfaceData = """\
!BRIEF [options] [-ref <mol file>] [-in <mol file>] [-out <mol file>]

!CATEGORY "input/output options"

  !PARAMETER -ref
    !TYPE string
    !REQUIRED true
    !BRIEF input reference mol file name
    !KEYLESS 1
  !END

  !PARAMETER -in
    !ALIAS -i
    !TYPE string
    !REQUIRED true
    !BRIEF input mol file name
    !KEYLESS 2
  !END

  !PARAMETER -out
    !ALIAS -o
    !TYPE string
    !REQUIRED false
    !BRIEF output file name, this implies that -overlay should be true
    !KEYLESS 3
  !END

!END

!CATEGORY "options"

  !PARAMETER -automorph
    !TYPE bool
    !DEFAULT true
    !BRIEF assign best atom association
    !DETAIL
        If false, atoms are associated by order.
        If true, graph isomorphism is determined with symmetry perception.
  !END

  !PARAMETER -overlay
    !TYPE bool
    !DEFAULT true
    !BRIEF Minimize to the smallest RMSD
  !END

  !PARAMETER -heavyonly
    !TYPE bool
    !DEFAULT true
    !BRIEF Ignore hydrogens for RMSD calculation
  !END

  !PARAMETER -verbose
    !ALIAS -v
    !TYPE bool
    !DEFAULT false
    !BRIEF verbose
  !END

!END
"""

#############################################################################
if __name__ == "__main__":
    sys.exit(main(sys.argv))

See also

Modifying SD tags

#!/usr/bin/env python
#############################################################################
# Copyright (C) 2003-2015 OpenEye Scientific Software, Inc.
#############################################################################
# Modifies the SD data of a set of input molecules by clearing all tags,
# defining which tags to keep or defining which tags to remove
#############################################################################
import sys
from openeye.oechem import *


def ClearProps(ifs, ofs):
    for mol in ifs.GetOEGraphMols():
        OEClearSDData(mol)
        OEWriteMolecule(ofs, mol)


def KeepProps(proplist, ifs, ofs):
    for mol in ifs.GetOEGraphMols():
        for dp in OEGetSDDataPairs(mol):
            if dp.GetTag() not in proplist:
                OEDeleteSDData(mol, dp.GetTag())
        OEWriteMolecule(ofs, mol)


def RemoveProps(proplist, ifs, ofs):
    for mol in ifs.GetOEGraphMols():
        for tag in proplist:
            OEDeleteSDData(mol, tag)
        OEWriteMolecule(ofs, mol)


def ModProps(itf, ifs, ofs):
    proplist = []
    if itf.HasString("-keep"):
        for prop in itf.GetStringList("-keep"):
            proplist.append(prop)
        KeepProps(proplist, ifs, ofs)
    elif itf.HasString("-remove"):
        for prop in itf.GetStringList("-remove"):
            proplist.append(prop)
        RemoveProps(proplist, ifs, ofs)
    elif itf.GetBool("-clearAll"):
        ClearProps(ifs, ofs)


def main(argv=[__name__]):
    itf = OEInterface(InterfaceData, argv)

    haskeep = itf.HasString("-keep")
    hasremove = itf.HasString("-remove")
    hasclear = itf.GetBool("-clearAll")

    numoption = 0
    for hasoption in [haskeep, hasremove, hasclear]:
        if hasoption:
            numoption += 1

    if numoption != 1:
        OEThrow.Usage("Need to pick one from -keep, -remove, or -clearAll")

    ifs = oemolistream()
    if not ifs.open(itf.GetString("-i")):
        OEThrow.Fatal("Unable to open %s for reading" % itf.GetString("-i"))
    if not OEIsSDDataFormat(ifs.GetFormat()):
        OEThrow.Fatal("Only works for input file formats that support SD data (sdf,oeb,csv)")

    ofs = oemolostream()
    if not ofs.open(itf.GetString("-o")):
        OEThrow.Fatal("Unable to open %s for writing" % itf.GetString("-o"))
    if not OEIsSDDataFormat(ofs.GetFormat()):
        OEThrow.Fatal("Only works for output file formats that support SD data (sdf,oeb,csv)")

    ModProps(itf, ifs, ofs)

InterfaceData = """
!BRIEF [-remove] <prop1 prop2...> [-keep] <prop1 prop2...> [-clearAll] -i <input> -o <output>
!PARAMETER -i
  !ALIAS -in
  !TYPE string
  !REQUIRED true
  !BRIEF Input file name
  !END
!PARAMETER -o
  !ALIAS -out
  !TYPE string
  !REQUIRED true
  !BRIEF Output file name
  !END
!PARAMETER -keep
  !ALIAS -k
  !TYPE string
  !LIST true
  !BRIEF SD tags to be kept
  !END
!PARAMETER -remove
  !ALIAS -r
  !TYPE string
  !LIST true
  !BRIEF SD tags to be removed
  !END
!PARAMETER -clearAll
  !ALIAS -c
  !TYPE bool
  !DEFAULT false
  !BRIEF Removes all SD tags
  !END
!END
"""


if __name__ == "__main__":
    sys.exit(main(sys.argv))

See also

Exporting SD data to a csv file

#!/usr/bin/env python
#############################################################################
# Copyright (C) 2003-2015 OpenEye Scientific Software, Inc.
#############################################################################
# Extract properties from SD file and save as CSV
#############################################################################
import sys
from openeye.oechem import *


def SDF2CSV(ifs, csv):
    taglist = []
    # read through once to find all unique tags
    for mol in ifs.GetOEGraphMols():
        for dp in OEGetSDDataPairs(mol):
            if dp.GetTag() not in taglist:
                taglist.append(dp.GetTag())

    ifs.rewind()
    # print out column labels
    header = "Title"
    for tag in taglist:
        header += ",%s" % tag
    header += '\n'
    csv.write(header)

    # build csv file
    for mol in ifs.GetOEGraphMols():
        line = [mol.GetTitle()]
        for tag in taglist:
            if OEHasSDData(mol, tag):
                value = OEGetSDData(mol, tag)
            else:
                value = ''
            line.append(',')
            line.append(value)
        csv.write(''.join(line))
        csv.write('\n')


def main(argv=[__name__]):
    if len(argv) != 3:
        OEThrow.Usage("%s <infile> <csvfile>" % argv[0])

    ifs = oemolistream()
    if not ifs.open(argv[1]):
        OEThrow.Fatal("Unable to open %s for reading" % argv[1])
    if ifs.GetFormat() not in [OEFormat_SDF, OEFormat_OEB]:
        OEThrow.Fatal("Only works for sdf or oeb input files")

    csv = oeofstream()
    if not csv.open(argv[2]):
        OEThrow.Fatal("Unable to open %s for writing" % argv[2])

    SDF2CSV(ifs, csv)


if __name__ == '__main__':
    sys.exit(main(sys.argv))

See also

Adding csv data as SD tags

#!/usr/bin/env python
#############################################################################
# Copyright (C) 2003-2015 OpenEye Scientific Software, Inc.
#############################################################################
# Merge a CSV file of data/properties, key on compound name in first column
# and use column titles as keys.  All data is read/written as strings
#############################################################################
import sys
import csv
from openeye.oechem import *


def CSV2SDF(csvfile, ifs, ofs):
    reader = csv.reader(csvfile, delimiter=',')
    propnames = next(reader)
    values = {}
    for row in reader:
        title = row[0]
        if title == "":
            OEThrow.Warning("Skipping entry with no title")
            continue
        value = row[1:]
        values[title] = value

    for mol in ifs.GetOEGraphMols():
        if mol.GetTitle() in values:
            count = 0
            for v in values[mol.GetTitle()]:
                count += 1
                if v == "":
                    continue
                else:
                    OESetSDData(mol, propnames[count], v)
        OEWriteMolecule(ofs, mol)


def main(argv=[__name__]):
    if len(argv) != 4:
        OEThrow.Usage("%s <csvfile> <infile> <outsdfile>" % argv[0])
    try:
        csvfile = open(argv[1])
    except:
        OEThrow.Fatal("Unable to open %s csv for reading" % argv[1])

    ifs = oemolistream()
    if not ifs.open(argv[2]):
        OEThrow.Fatal("Unable to open %s for reading" % argv[2])

    ofs = oemolostream()
    if not ofs.open(argv[3]):
        OEThrow.Fatal("Unable to open %s for writing" % argv[3])

    if ofs.GetFormat() not in [OEFormat_SDF, OEFormat_OEB]:
        OEThrow.Fatal("Only works for sdf or oeb output files")

    CSV2SDF(csvfile, ifs, ofs)

    csvfile.close()


if __name__ == '__main__':
    sys.exit(main(sys.argv))

See also

Renaming molecules by SD field

#!/usr/bin/env python
#############################################################################
# Copyright (C) 2003-2015 OpenEye Scientific Software, Inc.
#############################################################################
# Rename SDF molecules by specified field
#############################################################################
import sys
from openeye.oechem import *


def Rename(ifs, ofs, fieldname):
    for mol in ifs.GetOEGraphMols():
        if OEHasSDData(mol, fieldname):
            mol.SetTitle(OEGetSDData(mol, fieldname))
        else:
            title = mol.GetTitle()
            OEThrow.Warning("Renaming of molecule %s failed; no field %s" % (title, fieldname))
        OEWriteMolecule(ofs, mol)


def main(argv=[__name__]):
    if len(argv) != 4:
        OEThrow.Usage("%s <fieldname> <infile> <outfile>" % argv[0])

    fieldname = argv[1]
    ifs = oemolistream()
    if not ifs.open(argv[2]):
        OEThrow.Fatal("Unable to open %s for reading" % argv[2])
    if not OEIsSDDataFormat(ifs.GetFormat()):
        OEThrow.Fatal("Only works for input file formats that support SD data (sdf,oeb,csv)")

    ofs = oemolostream()
    if not ofs.open(argv[3]):
        OEThrow.Fatal("Unable to open %s for writing" % argv[3])

    Rename(ifs, ofs, fieldname)

if __name__ == "__main__":
    sys.exit(main(sys.argv))

See also

Filter molecules by SD data

#!/usr/bin/env python
#############################################################################
# Copyright (C) 2014-2015 OpenEye Scientific Software, Inc.
#############################################################################
# Filter molecules by SD data
#############################################################################
import sys
from openeye.oechem import *


def main(argv=[__name__]):
    itf = OEInterface(InterfaceData, argv)
    if not (itf.HasDouble("-min") or itf.HasDouble("-max")):
        OEThrow.Fatal("Please set a filter value with -min or -max")

    ifs = oemolistream()
    if not ifs.open(itf.GetString("-i")):
        OEThrow.Fatal("Unable to open %s for reading" % itf.GetString("-i"))

    if not OEIsSDDataFormat(ifs.GetFormat()):
        OEThrow.Fatal("Only works for input file formats that support SD data (sdf,oeb,csv)")

    ofs = oemolostream()
    if not ofs.open(itf.GetString("-o")):
        OEThrow.Fatal("Unable to open %s for writing" % itf.GetString("-i"))

    if not OEIsSDDataFormat(ofs.GetFormat()):
        OEThrow.Fatal("Only works for output file formats that support SD data (sdf,oeb,csv)")

    tag = itf.GetString("-tag")

    minval = float("-inf")
    if itf.HasDouble("-min"):
        minval = itf.GetDouble("-min")

    maxval = float("inf")
    if itf.HasDouble("-max"):
        maxval = itf.GetDouble("-max")

    for mol in ifs.GetOEGraphMols():
        if not OEHasSDData(mol, tag):
            OEThrow.Warning(
                "Unable to find %s tag on %s" % (tag, mol.GetTitle()))
            continue

        value = OEGetSDData(mol, tag)
        try:
            tagvalue = float(value)
        except ValueError:
            OEThrow.Warning("Failed to convert (%s) to a number in %s" %
                            (value, mol.GetTitle()))
            continue

        if tagvalue < minval:
            continue

        if tagvalue > maxval:
            continue

        OEWriteMolecule(ofs, mol)


InterfaceData = """
!BRIEF -i <input> -o <output> -tag <name> [-min <num>] [-max <num>]
!PARAMETER -i
  !TYPE string
  !REQUIRED true
  !BRIEF Input file name
  !KEYLESS 1
!END
!PARAMETER -o
  !TYPE string
  !REQUIRED true
  !BRIEF Output file name
  !KEYLESS 2
!END
!PARAMETER -tag
  !TYPE string
  !REQUIRED true
  !BRIEF SD tag
!END
!PARAMETER -min
  !TYPE double
  !REQUIRED false
  !BRIEF minimum value of SD tag
!END
!PARAMETER -max
  !TYPE double
  !REQUIRED false
  !BRIEF maximum value of SD tag
!END
"""

if __name__ == "__main__":
    sys.exit(main(sys.argv))

See also

Counting molecules

#!/usr/bin/env python
#############################################################################
#  Copyright (C) 2010-2015 OpenEye Scientific Software, Inc.
#############################################################################
# Counts molecule (and conformers) in input files
#############################################################################
from __future__ import print_function
import sys
from openeye.oechem import *


def PrintConfInfo(nconfs, nmols):
    print ("Total # of conformers:  ", nconfs)
    avg = 0
    if nmols:
        avg = float(nconfs) / nmols
    print ("Average # of conformers:", avg)


def MolCount(ifs, fname, conffrag):
    nummols = 0
    numconfs = 0
    for mol in ifs.GetOEMols():
        nummols += 1
        if conffrag:
            numconfs += mol.NumConfs()

    print ("%s contains %d molecule(s)." % (fname, nummols))

    if conffrag:
        PrintConfInfo(numconfs, nummols)
        print ("-----------------------------------------------------------")

    return nummols, numconfs


def main(argv=[__name__]):
    itf = OEInterface(Interface, argv)
    conffrag = itf.GetBool("-conf")

    totmols = 0
    totconfs = 0
    for fname in itf.GetStringList("-i"):
        ifs = oemolistream()
        if not ifs.open(fname):
            OEThrow.Warning("Unable to open %s for reading" % fname)
            continue

        nummol, numconfs = MolCount(ifs, fname, conffrag)
        totmols += nummol
        totconfs += numconfs

    print ("===========================================================")
    print ("Total %d molecules" % totmols)
    if conffrag:
        PrintConfInfo(totconfs, totmols)


Interface = """
!BRIEF [-conf] [-i] <infile1> [<infile2>...]
!PARAMETER -i
  !ALIAS -in
  !TYPE string
  !LIST true
  !REQUIRED true
  !BRIEF Input file name(s)
  !KEYLESS 1
!END
!PARAMETER -conf
  !ALIAS -c
  !TYPE bool
  !DEFAULT false
  !BRIEF Count conformers
!END
"""


if __name__ == "__main__":
    sys.exit(main(sys.argv))

Get molecule titles

#!/usr/bin/env python
#############################################################################
# Copyright (C) 2003-2015 OpenEye Scientific Software, Inc.
#############################################################################
# Output all molecule titles
#############################################################################
import sys
from openeye.oechem import *


def GenerateList(ifs, ofs):
    for mol in ifs.GetOEMols():
        title = mol.GetTitle()
        if len(title) == 0:
            title = "untitled"
        ofs.write('%s\n' % title)


def main(argv=[__name__]):
    if not (2 <= len(argv) <= 3):
        OEThrow.Usage("%s <infile> [<outfile>]" % argv[0])

    ifs = oemolistream()
    if not ifs.open(argv[1]):
        OEThrow.Fatal("Unable to open %s for reading" % argv[1])

    ofs = oeofstream()
    if len(argv) == 3:
        if not ofs.open(argv[2]):
            OEThrow.Fatal("Unable to open %s for writing" % argv[2])
    else:
        ofs = sys.stdout

    GenerateList(ifs, ofs)


if __name__ == "__main__":
    sys.exit(main(sys.argv))

See also

Find minimum path in a molecule

#!/usr/bin/env python
#############################################################################
# Copyright (C) 2003-2015 OpenEye Scientific Software, Inc.
#############################################################################
# Find the minimum path length between 2 smarts patterns
# or the path length between 2 named atoms
#############################################################################
from __future__ import print_function
import sys
from openeye.oechem import *


def AtomPathLength(ifs, ofs, itf, atm1, atm2):
    for mol in ifs.GetOEGraphMols():
        OETriposAtomNames(mol)

        a1 = None
        a2 = None
        for atm in mol.GetAtoms():
            if atm.GetName() == atm1:
                a1 = atm
            if atm.GetName() == atm2:
                a2 = atm
            if a1 is not None and a2 is not None:
                break

        if a1 is None or a2 is None:
            OEThrow.Warning("Failed to find atoms %s and %s in molecule" % (atm1, atm2))
            continue

        pathlen = OEGetPathLength(a1, a2)
        if itf.GetBool("-verbose") or not itf.HasString("-o"):
            print ("Path length: %s in %s" % (pathlen, OEMolToSmiles(mol)))

        spath = OEShortestPath(a1, a2)
        spathmol = OEGraphMol()
        adjustHCount = True
        OESubsetMol(spathmol, mol, OEIsAtomMember(spath), adjustHCount)
        spathsmiles = OEMolToSmiles(spathmol)

        if itf.HasString("-o"):
            OEWriteMolecule(ofs, spathmol)
        elif itf.GetBool("-verbose"):
            print (spathsmiles)


def SmartsPathLength(ifs, ofs, itf, ss1, ss2):
    for mol in ifs.GetOEGraphMols():
        OEPrepareSearch(mol, ss1)
        OEPrepareSearch(mol, ss2)
        if not (ss1.SingleMatch(mol) and ss2.SingleMatch(mol)):
            OEThrow.Warning("Unable to find SMARTS matches in %s, skipping"
                            % mol.GetTitle())
            continue

        unique = True
        allminlen = sys.maxsize
        for match1 in ss1.Match(mol, unique):
            for match2 in ss2.Match(mol, unique):
                minlen = sys.maxsize
                for atom1 in match1.GetTargetAtoms():
                    for atom2 in match2.GetTargetAtoms():
                        pathlen = OEGetPathLength(atom1, atom2)
                        if minlen > pathlen:
                            minlen = pathlen
                            atompairs = []
                            atompairs.append([atom1, atom2])

                        elif minlen == pathlen:
                            atompairs.append([atom1, atom2])

                if minlen < allminlen:
                    allminlen = minlen
                    allatompairs = atompairs[:]

                elif minlen == allminlen:
                    allatompairs += atompairs[:]

        if itf.GetBool("-verbose") or not itf.HasString("-o"):
            print ("Shortest path length: %s in %s" % (allminlen, OEMolToSmiles(mol)))

        spathlist = set()
        for satom1, satom2, in allatompairs:
            spath = OEShortestPath(satom1, satom2)
            spathmol = OEGraphMol()
            OESubsetMol(spathmol, mol, OEIsAtomMember(spath))
            spathsmiles = OEMolToSmiles(spathmol)

            if spathsmiles in spathlist:
                continue
            spathlist.add(spathsmiles)

            if itf.HasString("-o"):
                OEWriteMolecule(ofs, spathmol)
            elif itf.GetBool("-verbose"):
                print (spathsmiles)

    return


def main(argv=[__name__]):
    itf = OEInterface(Interface, argv)

    if not ((itf.HasString("-smarts1") and itf.HasString("-smarts2")) ^ (itf.HasString("-atom1") and itf.HasString("-atom2"))):
        OEThrow.Fatal("-smarts1 and -smarts2 or -atom1 and -atom2 must be set")

    ifs = oemolistream()
    if not ifs.open(itf.GetString("-i")):
        OEThrow.Fatal("Unable to open %s for reading" %
                      itf.GetString("-i").rstrip())

    ofs = oemolostream()
    if itf.HasString("-o"):
        if not ofs.open(itf.GetString("-o")):
            OEThrow.Fatal("Unable to open %s for writing" % itf.GetString("-o"))

    if itf.HasString("-smarts1") and itf.HasString("-smarts2"):
        ss1 = OESubSearch()
        smarts1 = itf.GetString("-smarts1")
        if not ss1.Init(smarts1):
            OEThrow.Fatal("Unable to parse SMARTS1: %s" % smarts1.rstrip())

        ss2 = OESubSearch()
        smarts2 = itf.GetString("-smarts2")
        if not ss2.Init(smarts2):
            OEThrow.Fatal("Unable to parse SMARTS2: %s" % smarts2.rstrip())

        SmartsPathLength(ifs, ofs, itf, ss1, ss2)

    else:
        atom1 = itf.GetString("-atom1")
        atom2 = itf.GetString("-atom2")
        AtomPathLength(ifs, ofs, itf, atom1, atom2)

Interface = """
!BRIEF -i <input> [-o <output>] -smarts1 <smarts> -smarts2 <smarts> | -atom1 <name> -atom2 <name>
!PARAMETER -i
  !TYPE string
  !REQUIRED true
  !BRIEF Input file name
  !KEYLESS 1
!END
!PARAMETER -o
  !TYPE string
  !REQUIRED false
  !BRIEF Output file name
  !KEYLESS 2
!END
!PARAMETER -smarts1
  !TYPE string
  !BRIEF Smarts pattern to identify 1st atom
!END
!PARAMETER -smarts2
  !TYPE string
  !BRIEF Smarts pattern to identify 2nd atom
!END
!PARAMETER -atom1
  !TYPE string
  !BRIEF Name of 1st atom
!END
!PARAMETER -atom2
  !TYPE string
  !BRIEF Name of 2nd atom
!END
!PARAMETER -verbose
  !TYPE bool
  !REQUIRED false
  !DEFAULT false
  !BRIEF Print verbose output
!END
"""


if __name__ == "__main__":
    sys.exit(main(sys.argv))

See also

Extract ring templates

#!/usr/bin/env python
#############################################################################
# Copyright (C) 2014-2015 OpenEye Scientific Software, Inc.
#############################################################################
# Extracts ring templates for 2D coordinate generation
#############################################################################

import sys
from openeye.oechem import *


def main(argv=[__name__]):

    itf = OEInterface(InterfaceData, argv)

    ifname = itf.GetString("-in")
    ofname = itf.GetString("-out")

    ifs = oemolistream()
    if not ifs.open(ifname):
        OEThrow.Fatal("Unable to open %s for reading" % ifname)

    if not OEIs2DFormat(ifs.GetFormat()):
        OEThrow.Fatal("Invalid input format: need 2D coordinates")

    ofs = oemolostream()
    if not ofs.open(ofname):
        OEThrow.Fatal("Unable to open %s for writing" % ofname)

    if not OEIs2DFormat(ofs.GetFormat()):
        OEThrow.Fatal("Invalid output format: unable to write 2D coordinates")

    nrrings = 0
    for mol in ifs.GetOEGraphMols():
        for ring in OEExtractRingTemplates(mol):
            nrrings += 1
            OEWriteMolecule(ofs, ring)

    OEThrow.Info("%d number of ring templates extracted" % nrrings)


InterfaceData = """
!BRIEF [-i] <input> [-o] <output>
!PARAMETER -i
  !ALIAS -in
  !TYPE string
  !REQUIRED true
  !BRIEF input file name
  !KEYLESS 1
!END
!PARAMETER -o
  !ALIAS -out
  !TYPE string
  !REQUIRED true
  !BRIEF output file name
  !KEYLESS 2
!END
"""

if __name__ == "__main__":
    sys.exit(main(sys.argv))

Create 2D ring dictionary

#!/usr/bin/env python
#############################################################################
# Copyright (C) 2015 OpenEye Scientific Software, Inc.
#############################################################################
# Creates a new 2D ring dictionary
#############################################################################

import sys
from openeye.oechem import *


def main(argv=[__name__]):

    itf = OEInterface(InterfaceData, argv)

    ifname = itf.GetString("-in")
    ofname = itf.GetString("-ringdict")

    ifs = oemolistream()
    if not ifs.open(ifname):
        OEThrow.Fatal("Unable to open %s for reading!" % ifname)

    if not OEIs2DFormat(ifs.GetFormat()):
        OEThrow.Fatal("Invalid input file format for 2D coordinates!")

    ofs = oemolostream()
    if not ofs.open(ofname):
        OEThrow.Fatal("Unable to open %s for writing!" % ofname)

    if ofs.GetFormat() != OEFormat_OEB:
        OEThrow.Fatal("Output file has to be OEB format!")

    ringdict = OE2DRingDictionary()

    dots = OEDots(10000, 100, "molecules")

    for mol in ifs.GetOEGraphMols():
        dots.Update()
        ringdict.AddRings(mol)

    nrrings = ringdict.NumRings()
    OEThrow.Info("%d ring template(s) have been extracted!" % nrrings)

    OEWrite2DRingDictionary(ofname, ringdict)

    return 0


#############################################################################
# INTERFACE
#############################################################################

InterfaceData = """
!BRIEF [-in] <input> [-ringdict] <output ringdict>

!CATEGORY "input/output options :"

  !PARAMETER -in
    !ALIAS -i
    !TYPE string
    !REQUIRED true
    !KEYLESS 1
    !VISIBILITY simple
    !BRIEF Input 2D molecule filename
  !END

  !PARAMETER -ringdict
    !ALIAS -rd
    !TYPE string
    !REQUIRED true
    !KEYLESS 2
    !VISIBILITY simple
    !BRIEF Output ring dictionary OEB filename
  !END

!END
"""

if __name__ == "__main__":
    sys.exit(main(sys.argv))

Append to 2D ring dictionary

#!/usr/bin/env python
#############################################################################
# Copyright (C) 2015, 2016 OpenEye Scientific Software, Inc.
#############################################################################
# Appends rings to an existing 2D rings dictionary
#############################################################################

import sys
from openeye.oechem import *


def main(argv=[__name__]):

    itf = OEInterface(InterfaceData, argv)

    ifname = itf.GetString("-in")
    irdfname = itf.GetString("-inringdict")
    ordfname = itf.GetString("-outringdict")

    ifs = oemolistream()
    if not ifs.open(ifname):
        OEThrow.Fatal("Unable to open %s for reading!" % ifname)

    if not OEIs2DFormat(ifs.GetFormat()):
        OEThrow.Fatal("Invalid input file format for 2D coordinates!")

    if not OEIsValid2DRingDictionary(irdfname):
        OEThrow.Fatal("Invalid ring dirctinary file!")

    ringdict = OE2DRingDictionary(irdfname)

    nrrings = ringdict.NumRings()

    dots = OEDots(10000, 100, "molecules")

    for mol in ifs.GetOEGraphMols():
        dots.Update()
        ringdict.AddRings(mol)

    dots.Total()

    nrnewrings = ringdict.NumRings() - nrrings
    OEThrow.Info("%d new ring templates have been added!" % nrnewrings)

    OEWrite2DRingDictionary(ordfname, ringdict)

    return 0


#############################################################################
# INTERFACE
#############################################################################

InterfaceData = """
!BRIEF [-in] <input> [-inringdict] <input ringdict> [-outringdict] <output ringdict>

!CATEGORY "input/output options :"

  !PARAMETER -in
    !ALIAS -i
    !TYPE string
    !REQUIRED true
    !KEYLESS 1
    !VISIBILITY simple
    !BRIEF Input 2D molecule filename
  !END

  !PARAMETER -inringdict
    !ALIAS -ird
    !TYPE string
    !REQUIRED true
    !KEYLESS 2
    !VISIBILITY simple
    !BRIEF Input ring dictionary OEB filename
    !DETAIL
        2D ring dictionaries can be generated by the following OEChem examples:
        C++    - createringdict.cpp
        Python - createringdict.py
        Java   - CreateRingDict.java
        C#     - CreateRingDict.cs
  !END

  !PARAMETER -outringdict
    !ALIAS -ord
    !TYPE string
    !REQUIRED true
    !KEYLESS 3
    !VISIBILITY simple
    !BRIEF Output ring dictionary OEB filename
  !END

!END
"""


if __name__ == "__main__":
    sys.exit(main(sys.argv))

Generate 2D coordinates with user-defined ring templates

#!/usr/bin/env python
#############################################################################
# Copyright (C) 2015 OpenEye Scientific Software, Inc.
#############################################################################
# Generates 2D coordinates using user-defined ring templates
#############################################################################

import sys
from openeye.oechem import *


def main(argv=[__name__]):

    itf = OEInterface(InterfaceData, argv)

    ifname = itf.GetString("-in")
    ofname = itf.GetString("-out")

    ifs = oemolistream()
    if not ifs.open(ifname):
        OEThrow.Fatal("Unable to open %s for reading!" % ifname)

    ofs = oemolostream()
    if not ofs.open(ofname):
        OEThrow.Fatal("Unable to open %s for writing!" % ofname)

    if not OEIs2DFormat(ofs.GetFormat()):
        OEThrow.Fatal("Invalid output file format for 2D coordinates!")

    if itf.HasString("-ringdict"):
        rdfname = itf.GetString("-ringdict")

        if not OEIsValid2DRingDictionary(rdfname):
            OEThrow.Warning("Invalid 2D ring dictionary file!")
        else:
            OEInit2DRingDictionary(rdfname)

    for mol in ifs.GetOEGraphMols():
        OEGenerate2DCoordinates(mol)
        OEWriteMolecule(ofs, mol)

    return 0


#############################################################################
# INTERFACE
#############################################################################

InterfaceData = """
!BRIEF [-in] <input> [-out] <output> [-ringdict] <ringdict file>

!CATEGORY "input/output options :"

  !PARAMETER -in
    !ALIAS -i
    !TYPE string
    !REQUIRED true
    !KEYLESS 1
    !VISIBILITY simple
    !BRIEF Input filename
  !END

  !PARAMETER -out
    !ALIAS -o
    !TYPE string
    !REQUIRED true
    !KEYLESS 2
    !VISIBILITY simple
    !BRIEF Output filename
  !END

  !PARAMETER -ringdict
    !ALIAS -rd
    !TYPE string
    !REQUIRED false
    !VISIBILITY simple
    !BRIEF Ring dictionary file
    !DETAIL
        2D ring dictionaries can be generated by the following OEChem examples:
        C++    - createringdict.cpp
        Python - createringdict.py
        Java   - CreateRingDict.java
        C#     - CreateRingDict.cs
  !END

!END
"""

if __name__ == "__main__":
    sys.exit(main(sys.argv))