Appendix: Additional Examples in Python

These are full listings of programming examples that are offered for download, elsewhere in this chapter. See the guide to programming examples earlier in this chapter

Listing 1: du2mmcif.py full listing.

#!/usr/bin/env python
# (C) 2022 Cadence Design Systems, Inc. (Cadence) 
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.


import sys
import os
from openeye import oechem
from openeye import oegrid
from openeye import oespruce


def main(argv=sys.argv):
    if len(argv) > 2:
        oechem.OEThrow.Usage("%s <infile>" % argv[0])
    ifs = oechem.oemolistream()
    ifile = argv[1]
    ofile = os.path.basename(ifile)[:-5] + ".cif"

    ofs = oechem.oemolostream(ofile)
    ofs.SetFlavor(oechem.OEFormat_MMCIF, oechem.OEOFlavor_MMCIF_Default)   

    du = oechem.OEDesignUnit() 
    if not oechem.OEReadDesignUnit(ifile, du):
        oechem.OEThrow.Fatal("Cannot read design unit!")
    complex = oechem.OEGraphMol()
    du.GetComponents(complex, oechem.OEDesignUnitComponents_All ^ oechem.OEDesignUnitComponents_PackingResidues)
    du.GetPDBMetaData(complex)
    oechem.OEWriteMolecule(ofs, complex)
    ofs.close()

if __name__ == "__main__":
    sys.exit(main(sys.argv))

Listing 2: add_receptor_obj_to_du.py full listing.

#!/usr/bin/env python
# (C) 2022 Cadence Design Systems, Inc. (Cadence) 
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.

import os
import sys
from openeye import oechem, oedocking

COMBINED_DESIGNUNIT_CONSTANTS = {
    "All":oechem.OEDesignUnitComponents_All,
    "TargetComplex":oechem.OEDesignUnitComponents_TargetComplex,
    "TargetComplexNoSolvent":oechem.OEDesignUnitComponents_TargetComplexNoSolvent,
    "Default":oechem.OEDesignUnitComponents_Default,
    "ListComponents":oechem.OEDesignUnitComponents_ListComponents,
    "MacroMolComponents":oechem.OEDesignUnitComponents_MacroMolComponents,
    "MolComponents":oechem.OEDesignUnitComponents_MolComponents,
}

def make_receptor_with_custom_mask_and_predicate(du, receptor_out, mask, pred_str):
    """
    Inputs:
        du (str): The file containing OEDesignUnit (.oedu)
        receptor_out (str): Path for the output receptor file
        pred (str): Pipe separated string of residues to create subset. 
        If no predicate is provided, the entirety of an included 
        component will be part of the receptor mask.
        mask(OEDesignUnitComponent): Specifies the component of the design unit
        that is to be used as as receptor mask. 

    """

    opts = oedocking.OEMakeReceptorOptions()

    # Add target predicate to create OESubSetDesignUnit
    opts.SetTargetPred(pred_str)
    target_mask = mask
    opts.SetTargetMask(target_mask)

    if oedocking.OEMakeReceptor(du, opts):
        oechem.OEThrow.Info(f"Successfully created receptor: {receptor_out}")
        return True
    
    oechem.OEThrow.Warning(f"{du.GetTitle()} failed to make receptor")
    return False

def get_target_component_mask(component_string):

    if component_string in COMBINED_DESIGNUNIT_CONSTANTS:
        return COMBINED_DESIGNUNIT_CONSTANTS[component_string]
    
    component_id = oechem.OEGetDesignUnitComponentID(component_string)

    if component_id == 0:
        oechem.Throw.Fatal(f"{component_string} is not a OEDesignUnit Component")
    
    return component_id

def main(argv=[__name__]):
    """
    Add solvent or cofactor atoms/molecules to a receptor docking grid.
    """
    itf = oechem.OEInterface(InterfaceData, argv)
    infile = itf.GetString("-du")
    receptor_out = itf.GetString("-out")
    target_mask = itf.GetString("-mask")
    pred_str = itf.GetString("-pred")

    if not os.path.exists(infile):
        oechem.OEThrow.Fatal(f"{infile} does not exist.")

    du = oechem.OEDesignUnit()
    if not oechem.OEReadDesignUnit(infile, du):
        oechem.OEThrow.Fatal("Cannot read design unit!")

    if not oechem.OEIsWriteableDesignUnit(receptor_out):
        oechem.OEThrow.Fatal(f"Can not write design unit to {receptor_out}")

    target_mask = target_mask.split(",")
    if len(target_mask) == 0:
        processed_mask = oechem.OEDesignUnitComponents_TargetComplexNoSolvent
    elif len(target_mask) == 1:
        processed_mask = get_target_component_mask(target_mask[0].strip())
    else:
        processed_mask = get_target_component_mask(target_mask[0].strip())
        for component_string in target_mask[1:]:
            print("hit it")
            processed_mask = processed_mask | get_target_component_mask(component_string.strip())

    success = make_receptor_with_custom_mask_and_predicate(du, receptor_out, processed_mask, pred_str)

    if success:
        oechem.OEWriteDesignUnit(receptor_out, du)

InterfaceData = """
!BRIEF [To run] python add_receptor_obj_to_du.py -du <oedu> 

!PARAMETER -du
    !TYPE string
    !REQUIRED true
    !BRIEF Input OEDesignUnit file.
    !DETAIL
        Input OEDesignUnit (.oedu) file containing the prepared protein structure.
!END

!PARAMETER -out
    !TYPE string
    !REQUIRED false
    !DEFAULT receptor_out.oedu
    !BRIEF Output receptor file.
    !DETAIL
        Output receptor file (.oedu).
!END

!PARAMETER -mask
    !TYPE string
    !REQUIRED false
    !DEFAULT TargetComplexNoSolvent
    !BRIEF Mask string containing the component names for the receptor mask. Components should be separated by string.
    !DETAIL
        Mask string containing the component names for the receptor mask. Components should be separated by string.
        Example: "protein,ligand,cofactors". Options to choose from: protein, nucleic, ligand, cofactors, solvent, 
        metals, counter_ions, lipids, packing_residues, excipients, suagrs, polymers, post_translational, other_proteins, 
        other_nucleics, other_ligands, other_cofactors, undefined, All, TargetComplex, TargetComplexNoSolvent, Default,
        ListComponents, MacroMolComponents, and MolComponents.
!END

!PARAMETER -pred
    !TYPE string
    !REQUIRED false
    !BRIEF Predicate string containing residue IDs, solvent or cofactor molecules to create a subset
    !DETAIL
        Predicate string containing solvent and cofactor molecules with their residue IDs. Accepted format is, 
        "MN:508: :D:1:|HOH:435: :D:1:"
        where, if needed, empty space is filled with alt loc.
!END

"""

if __name__ == "__main__":
    sys.exit(main(sys.argv))

Listing 3: extract_biounits_ref.py full listing.

#!/usr/bin/env python
# (C) 2022 Cadence Design Systems, Inc. (Cadence) 
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.

#############################################################################
# Simple superimposition of a fit protein on to a reference protein
#############################################################################
import sys
import os
from openeye import oechem
from openeye import oespruce
import tempfile


def ReadProteinMol(pdb_file, mol):
    ifs = oechem.oemolistream()
    ifs.SetFlavor(oechem.OEFormat_PDB, oechem.OEIFlavor_PDB_SpruceDefault)
    ifs.SetFlavor(oechem.OEFormat_MMCIF, oechem.OEIFlavor_MMCIF_SpruceDefault)

    if not ifs.open(pdb_file):
        oechem.OEThrow.Fatal("Unable to open %s for reading." % pdb_file)

    temp_mol = oechem.OEGraphMol()
    if not oechem.OEReadMolecule(ifs, temp_mol):
        oechem.OEThrow.Fatal("Unable to read molecule from %s." % pdb_file)
    ifs.close()

    fact = oechem.OEAltLocationFactory(temp_mol)
    mol.Clear()
    fact.MakePrimaryAltMol(mol)
    return (mol)


def main(argv=[__name__]):
    if len(argv) not in [3, 5, 6]:
        oechem.OEThrow.Usage("%s <extract protein PDB> <reference protein PDB> [min score] [superpose] [nowrite]" % argv[0])  # noqa

    do_write = True
    if len(argv) == 6:
        if argv[5] != "nowrite":
            oechem.OEThrow.Warning("%s is not a valid option.\n" % argv[5])
            sys.exit(1)
        else:
            do_write = False

    opts = oespruce.OEBioUnitExtractionOptions()
    if len(argv) >= 5:
        opts.SetMinScore(int(argv[3]))
        opts.SetSuperpose(bool(argv[4]))

    extract_prot_file = argv[1]
    extract_prot = oechem.OEGraphMol()
    extract_success = ReadProteinMol(extract_prot_file, extract_prot)
    if not extract_success:
        oechem.OEThrow.Fatal("Unable to extract protein(s) from PDB file.")

    ref_prot_file = argv[2]
    ref_prot = oechem.OEGraphMol()
    ref_success = ReadProteinMol(ref_prot_file, ref_prot)
    if not ref_success:
        oechem.OEThrow.Fatal("Unable to reference protein(s) from PDB file.")

    biounits = oespruce.OEExtractBioUnits(extract_prot, ref_prot, opts)

    if do_write:
        pdb_ext = ".pdb"
        str_pos = extract_prot_file.find(pdb_ext)
        base_name = extract_prot_file[0:str_pos]
        temp_dir = tempfile.mkdtemp()

        for i, biounit in enumerate(biounits):
            output_biounit_file = os.path.join(temp_dir, base_name + "_BU_{}.oeb.gz".format(i))  # noqa
            print("Writing biounit {} to {}".format(i, output_biounit_file))
            ofs = oechem.oemolostream(output_biounit_file)
            oechem.OEWriteMolecule(ofs, biounit)


if __name__ == "__main__":
    sys.exit(main(sys.argv))

Listing 4: extract_biounits_remarks.py full listing.

#!/usr/bin/env python
# (C) 2022 Cadence Design Systems, Inc. (Cadence) 
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.

#############################################################################
# Simple superimposition of a fit protein on to a reference protein
#############################################################################
import sys
import os
from openeye import oechem
from openeye import oespruce
import tempfile


def ReadProteinMol(pdb_file, mol):
    ifs = oechem.oemolistream()
    ifs.SetFlavor(oechem.OEFormat_PDB, oechem.OEIFlavor_PDB_SpruceDefault)
    ifs.SetFlavor(oechem.OEFormat_MMCIF, oechem.OEIFlavor_MMCIF_SpruceDefault)

    if not ifs.open(pdb_file):
        oechem.OEThrow.Fatal("Unable to open %s for reading." % pdb_file)

    temp_mol = oechem.OEGraphMol()
    if not oechem.OEReadMolecule(ifs, temp_mol):
        oechem.OEThrow.Fatal("Unable to read molecule from %s." % pdb_file)
    ifs.close()

    fact = oechem.OEAltLocationFactory(temp_mol)
    mol.Clear()
    fact.MakePrimaryAltMol(mol)
    return (mol)


def main(argv=[__name__]):
    if len(argv) not in [2, 4, 5]:
        oechem.OEThrow.Usage("%s <extract protein PDB> [max atoms] [prefer author] [nowrite]" % argv[0])  # noqa

    do_write = True
    if len(argv) == 5:
        if argv[4] != "nowrite":
            oechem.OEThrow.Warning("%s is not a valid option.\n" % argv[4])
            sys.exit(1)
        else:
            do_write = False

    opts = oespruce.OEBioUnitExtractionOptions()
    if len(argv) >= 4:
        opts.SetMaxAtoms(int(argv[2]))
        opts.SetPreferAuthorRecord(bool(argv[3]))

    extract_prot_file = argv[1]
    extract_prot = oechem.OEGraphMol()
    extract_success = ReadProteinMol(extract_prot_file, extract_prot)
    if not extract_success:
        oechem.OEThrow.Fatal("Unable to read protein(s) from PDB file.")

    biounits = oespruce.OEExtractBioUnits(extract_prot, opts)

    if do_write:
        pdb_ext = ".pdb"
        str_pos = extract_prot_file.find(pdb_ext)
        base_name = extract_prot_file[0:str_pos]
        temp_dir = tempfile.mkdtemp()

        for i, biounit in enumerate(biounits):
            output_biounit_file = os.path.join(temp_dir, base_name + "_BU_{}.oeb.gz".format(i))  # noqa
            print("Writing biounit {} to {}".format(i, output_biounit_file))
            ofs = oechem.oemolostream(output_biounit_file)
            oechem.OEWriteMolecule(ofs, biounit)


if __name__ == "__main__":
    sys.exit(main(sys.argv))

Listing 5: findpockets.py full listing.

#!/usr/bin/env python
# (C) 2022 Cadence Design Systems, Inc. (Cadence) 
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.

#############################################################################################################
# This program demonstrates how to find pockets and some of its properties from a protein or DesignUnit file.
#############################################################################################################
import sys
from openeye import oechem
from openeye import oespruce


def readProteinMol(ifilename):
    if oechem.OEGetFileExtension(ifilename) == 'oedu':
        du = oechem.OEDesignUnit()
        if not oechem.OEReadDesignUnit(ifilename, du):
            oechem.OEThrow.Fatal("Unable to open %s for reading OEDesignUnit" % ifilename)
        return du
    else:
        ifs = oechem.oemolistream()
        if not ifs.open(ifilename):
            oechem.OEThrow.Fatal("Unable to open %s for reading" % ifilename)
        mol = oechem.OEGraphMol()
        oechem.OEReadMolecule(ifs, mol)
        return mol


def main(argv=None):
    if argv is None:
        argv = [__name__]
    if len(sys.argv) != 2:
        oechem.OEThrow.Usage("%s <protein or DesignUnit input file>" % argv[0])

    mol = readProteinMol(sys.argv[1])

    pockets = oespruce.OEFindPockets(mol)
    print("pockets count: %s" % len(list(pockets)))
    pockets.ToFirst()
    pocket_cntr = 0
    for pocket in pockets:
        pocket_cntr += 1
        pocket_residues = pocket.GetResidues()
        print("pocket_%s Residues count: " % pocket_cntr, len(list(pocket_residues)))
        pocket_residues.ToFirst()
        print("pocket_%s Residues: " % pocket_cntr)
        for res in pocket_residues:
            print(res)
        print("pocket_%s Surface Area: " % pocket_cntr,  pocket.GetSurfaceArea())


if __name__ == "__main__":
    sys.exit(main(sys.argv))

Listing 6: make_apo_design_units.py full listing.

#!/usr/bin/env python
# (C) 2022 Cadence Design Systems, Inc. (Cadence) 
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.

#############################################################################
# Script to prepare proteins into design units
#############################################################################
import sys
import os
from openeye import oechem
from openeye import oegrid
from openeye import oespruce


def main(argv=sys.argv):

    if len(argv) < 3 or len(argv) > 5:
        oechem.OEThrow.Usage(
            "%s <infile> <site_residue> [<mtzfile>] [<loopdbfile>]" % argv[0]
        )

    ifs = oechem.oemolistream()
    ifile = argv[1]
    if not ifs.open(ifile):
        oechem.OEThrow.Fatal("Unable to open %s for reading" % ifile)

    site_residue = argv[2]

    include_loop = False
    include_ed = False
    if len(argv) > 3:
        if len(argv) == 5 or (len(argv) == 4 and "mtz" in argv[3]):
            edfile = argv[3]
            ed = oegrid.OESkewGrid()
            if not oegrid.OEReadMTZ(edfile, ed, oegrid.OEMTZMapType_Fwt):
                oechem.OEThrow.Fatal(
                    "Unable to read electron density file %s" % edfile
                )  # noqa
            include_ed = True
        if len(argv) == 5:
            loopfile = argv[4]
            include_loop = True
        elif len(argv) == 4 and "mtz" not in argv[3]:
            loopfile = argv[3]
            include_loop = True

    if ifs.GetFormat() not in [oechem.OEFormat_PDB, oechem.OEFormat_CIF]:
        oechem.OEThrow.Fatal("Only works for .pdb or .cif input files")

    ifs.SetFlavor(oechem.OEFormat_PDB, oechem.OEIFlavor_PDB_SpruceDefault)
    ifs.SetFlavor(oechem.OEFormat_MMCIF, oechem.OEIFlavor_MMCIF_SpruceDefault)

    mol = oechem.OEGraphMol()
    if not oechem.OEReadMolecule(ifs, mol):
        oechem.OEThrow.Fatal("Unable to read molecule from %s" % ifile)

    metadata = oespruce.OEStructureMetadata()
    opts = oespruce.OEMakeDesignUnitOptions()
    opts.GetPrepOptions().GetBuildOptions().GetLoopBuilderOptions().SetBuildTails(False)
    if include_loop:
        opts.GetPrepOptions().GetBuildOptions().GetLoopBuilderOptions().SetLoopDBFilename(
            loopfile
        )

    if include_ed:
        design_units = oespruce.OEMakeDesignUnits(mol, ed, metadata, opts, site_residue)
    else:
        design_units = oespruce.OEMakeDesignUnits(mol, metadata, opts, site_residue)

    base_name = os.path.basename(ifile)[:-4] + "_DU_{}.oedu"
    for i, design_unit in enumerate(design_units):
        oechem.OEWriteDesignUnit(base_name.format(i), design_unit)


if __name__ == "__main__":
    sys.exit(main(sys.argv))

Listing 7: make_design_unit_using_metadata.py full listing.

#!/usr/bin/env python
# (C) 2022 Cadence Design Systems, Inc. (Cadence) 
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.


'''
This script prepares an MD or docking-ready design unit with receptor from an input PDB or mmCIF file.

Input parameters:
    Required:
        -in: The input PDB or mmCIF file
        -site_residue: Defines a binding site residue for pocket detection (Required for apo structures only)

    Optional:
        -map: Input mtz file containing the electron density map
        -loop_db: Input loop_db file for loop modeling
        -generate_tautomers: generate and use tautomers in the hydrogen network optimization
        -prefix: String to prepend to all output DU files
        -metadata: Input metadata JSON file
        -allow_filter_error: Run spruce prep even when structure fails spruce filter
        -verbose: boolean flag to trigger verbose logging

Usage examples:
python spruce_prep.py -in 3fly.cif
python spruce_prep.py -in 3fly.pdb -metadata 3fly_metadata.json -generate_tautomers false
python spruce_prep.py -in 3fly.pdb -map 3fly.mtz -verbose true -prefix 3FLY -loop_db my_loop_db.loop_db
python spruce_prep.py -3p2q.pdb -site_residue 'HIS: 76: :A'
'''


import sys
from openeye import oechem
from openeye import oegrid
from openeye import oespruce
from openeye import oedocking


InterfaceData = '''
!PARAMETER -in
    !TYPE string
    !REQUIRED true
    !BRIEF Input PDB/CIF file name
!END

!PARAMETER -site_residue
    !TYPE string
    !REQUIRED false
    !BRIEF Site residue specification to indentify binding site (ex: 'HIS:42: :A')
!END

!PARAMETER -prefix
    !TYPE string
    !REQUIRED false
    !BRIEF prefix to append to all output DU file names
!END

!PARAMETER -map
    !TYPE string
    !REQUIRED false
    !LEGAL_VALUE *.mtz
    !BRIEF Input electron density file 
!END

!PARAMETER -loop_db
    !TYPE string
    !REQUIRED false
    !LEGAL_VALUE *.loop_db
    !BRIEF Input database for loop modeling
!END

!PARAMETER -generate_tautomers
    !TYPE bool
    !REQUIRED false
    !DEFAULT true
    !BRIEF Option to generate and use tautomers in the hydrogen network optimization (optional)
!END

!PARAMETER -metadata
    !TYPE string
    !REQUIRED false
    !LEGAL_VALUE *.json
    !BRIEF Input structure metadata json file
!END

!PARAMETER -allow_filter_error
    !TYPE bool
    !REQUIRED false
    !DEFAULT false
    !BRIEF Option to allow running spruce prep even when structure fails spruce filter.
!END

!PARAMETER -verbose
    !TYPE bool
    !REQUIRED false
    !DEFAULT false
    !BRIEF Boolean flag to trigger verbose logging
!END
'''


def main(argv=sys.argv):
    itf = oechem.OEInterface(InterfaceData, argv)

    # read input parameters
    ifile = itf.GetString('-in')

    include_ed = False
    if itf.HasString('-map'):
        mapfile = itf.GetString('-map')
        include_ed = True

    include_loop = False
    if itf.HasString('-loopdb'):
        loopfile = itf.GetString('-loopdb')
        include_loop = True

    site_residue_specified = False
    if itf.HasString('-site_residue'):
        site_residue = itf.GetString('-site_residue')
        site_residue_specified = True

    has_prefix = False
    if itf.HasString('-prefix'):
        prefix = itf.GetString('-prefix')
        has_prefix = True

    if itf.GetBool('-verbose'):
        oechem.OEThrow.SetLevel(oechem.OEErrorLevel_Verbose)

    has_metadata = False
    if itf.HasString('-metadata'):
        metadata_json_name = itf.GetString('-metadata')
        with open(metadata_json_name, "r") as f:
            metadata_json = f.read()
        has_metadata = True

    allow_filter_error = itf.GetBool('-allow_filter_error')

    generate_tautomers = itf.GetBool('-generate_tautomers')

    # read PDB or CIF input file
    ifs = oechem.oemolistream()
    if not ifs.open(ifile):
        oechem.OEThrow.Fatal(f'Unable to open {ifile} for reading')

    if ifs.GetFormat() not in [oechem.OEFormat_PDB, oechem.OEFormat_CIF]:
        oechem.OEThrow.Fatal('Input file must be .pdb or .cif')

    ifs.SetFlavor(oechem.OEFormat_PDB,
                  oechem.OEIFlavor_PDB_Default |
                  oechem.OEIFlavor_PDB_DATA |
                  oechem.OEIFlavor_PDB_ALTLOC)

    mol = oechem.OEGraphMol()
    if not oechem.OEReadMolecule(ifs, mol):
        oechem.OEThrow.Fatal(f'Unable to read molecule from {ifile}')

    # read mtz file if included
    ed = oegrid.OESkewGrid()
    if include_ed:
        if not oegrid.OEReadMTZ(mapfile, ed, oegrid.OEMTZMapType_Fwt):
            oechem.OEThrow.Fatal(f'Unable to read electron density file {mapfile}')

    makedu_opts = oespruce.OEMakeDesignUnitOptions()

    makedu_opts.GetSplitOptions().SetAlternateLocationHandling(oespruce.OEAlternateLocationOption_Combinatorial)
    makedu_opts.GetSplitOptions().SetMinLigAtoms(8)
    makedu_opts.GetSplitOptions().SetMaxLigAtoms(200)
    makedu_opts.GetSplitOptions().SetMaxLigResidues(20)

    makedu_opts.GetPrepOptions().GetEnumerateSitesOptions().SetEnumerateCofactorSites(False)
    makedu_opts.GetPrepOptions().GetEnumerateSitesOptions().SetCollapseNonSiteAlts(False)

    # read metadata file if provided
    metadata = oespruce.OEStructureMetadata()
    if has_metadata:
        oespruce.OEStructureMetadataFromJson(metadata, metadata_json)

    # set loop database if included
    if include_loop:
        makedu_opts.GetPrepOptions().GetBuildOptions().GetLoopBuilderOptions().SetLoopDBFilename(loopfile)

    # set tautomer generation flag
    makedu_opts.GetPrepOptions().GetProtonateOptions().SetGenerateTautomers(generate_tautomers)

    # run Spruce filter
    filter_opts = oespruce.OESpruceFilterOptions()
    filter = oespruce.OESpruceFilter(filter_opts, makedu_opts)
    ret_filter = filter.StandardizeAndFilter(mol, ed, metadata)

    if ret_filter !=oespruce.OESpruceFilterIssueCodes_Success:
        oechem.OEThrow.Warning(f'This structure fails spruce filter due to: ')
        oechem.OEThrow.Warning(filter.GetMessages())
        if not allow_filter_error:
            oechem.OEThrow.Fatal('This structure fails spruce filter')

    # make the DUs
    if site_residue_specified:
        # use site residue
        if include_ed:
            design_units = oespruce.OEMakeDesignUnits(mol, ed, metadata, makedu_opts, site_residue)
        else:
            design_units = oespruce.OEMakeDesignUnits(mol, metadata, makedu_opts, site_residue)
    else:
        # assume structure has bound ligand
        if include_ed:
            design_units = oespruce.OEMakeDesignUnits(mol, ed, metadata, makedu_opts)
        else:
            design_units = oespruce.OEMakeDesignUnits(mol, metadata, makedu_opts)

    # validate the DUs
    validator = oespruce.OEValidateDesignUnit()
    for i, design_unit in enumerate(design_units):
        ret_validator = validator.Validate(design_unit, metadata)

        if ret_validator != oespruce.OEDesignUnitIssueCodes_Success:
            oechem.OEThrow.Warning(f'Design unit {design_unit.GetTitle()} did not pass the DU validator.')
            oechem.OEThrow.Warning(validator.GetMessages())

        # make the receptor
        ropts = oedocking.OEMakeReceptorOptions()
        if not oedocking.OEMakeReceptor(design_unit, ropts):
            oechem.OEThrow.Warning(f'Unable to generate receptor for design unit {design_unit.GetTitle()}')

        # write the DU
        print(design_unit.GetTitle())
        basename = f'{design_unit.GetTitle()}'.replace('(', '_').replace(')', '_').replace(' > ', 'DU_').replace(' ', '_').replace('/', '-')[:-1]
        if has_prefix:
            ofile = f'{prefix}_{basename}.oedu'
        else:
            ofile = f'{basename}.oedu'

        if not oechem.OEWriteDesignUnit(ofile, design_unit):
            oechem.OEThrow.Warning(f'Unable to write design unit {design_unit.GetTitle()}')


if __name__ == "__main__":
    sys.exit(main(sys.argv))

Listing 8: make_design_units.py full listing.

#!/usr/bin/env python
# (C) 2022 Cadence Design Systems, Inc. (Cadence) 
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.

#############################################################################
# Script to prepare proteins into design units
#############################################################################
import sys
import os
from openeye import oechem
from openeye import oegrid
from openeye import oespruce


def main(argv=sys.argv):

    if len(argv) < 2 or len(argv) > 4:
        oechem.OEThrow.Usage("%s <infile> [<mtzfile>] [<loopdbfile>]" % argv[0])

    ifs = oechem.oemolistream()
    ifile = argv[1]
    if not ifs.open(ifile):
        oechem.OEThrow.Fatal("Unable to open %s for reading" % ifile)

    include_loop = False
    include_ed = False
    ed = oegrid.OESkewGrid()

    if len(argv) > 2:
        if len(argv) == 4 or (len(argv) == 3 and "mtz" in argv[2]):
            edfile = argv[2]
            if not oegrid.OEReadMTZ(edfile, ed, oegrid.OEMTZMapType_Fwt):
                oechem.OEThrow.Fatal(
                    "Unable to read electron density file %s" % edfile
                )  # noqa
            include_ed = True
        if len(argv) == 4:
            loopfile = argv[3]
            include_loop = True
        elif len(argv) == 3 and "mtz" not in argv[2]:
            loopfile = argv[2]
            include_loop = True

    if ifs.GetFormat() not in [oechem.OEFormat_PDB, oechem.OEFormat_CIF]:
        oechem.OEThrow.Fatal("Only works for .pdb or .cif input files")

    ifs.SetFlavor(oechem.OEFormat_PDB, oechem.OEIFlavor_PDB_SpruceDefault)
    ifs.SetFlavor(oechem.OEFormat_MMCIF, oechem.OEIFlavor_MMCIF_SpruceDefault)

    mol = oechem.OEGraphMol()
    if not oechem.OEReadMolecule(ifs, mol):
        oechem.OEThrow.Fatal("Unable to read molecule from %s" % ifile)

    allow_filter_errors = False
    metadata = oespruce.OEStructureMetadata()
    filter_opts = oespruce.OESpruceFilterOptions()
    makedu_opts = oespruce.OEMakeDesignUnitOptions()
    makedu_opts.GetPrepOptions().GetBuildOptions().GetLoopBuilderOptions().SetBuildTails(False)
    if include_loop:
        makedu_opts.GetPrepOptions().GetBuildOptions().GetLoopBuilderOptions().SetLoopDBFilename(
            loopfile
        )
    
    filter = oespruce.OESpruceFilter(filter_opts, makedu_opts)
    ret_filter = filter.StandardizeAndFilter(mol, ed, metadata)
    if ret_filter !=oespruce.OESpruceFilterIssueCodes_Success:
        oechem.OEThrow.Warning("This structure fails spruce filter due to: ")
        oechem.OEThrow.Warning(filter.GetMessages())
        if not allow_filter_errors:
            oechem.OEThrow.Fatal("This structure fails spruce filter")

    if include_ed:
        design_units = oespruce.OEMakeDesignUnits(mol, ed, metadata, makedu_opts)
    else:
        design_units = oespruce.OEMakeDesignUnits(mol, metadata, makedu_opts)

    validator = oespruce.OEValidateDesignUnit()

    base_name = os.path.basename(ifile)[:-4] + "_DU_{}.oedu"
    for i, design_unit in enumerate(design_units):
        ret_validator = validator.Validate(design_unit,metadata)

        if ret_validator != oespruce.OEDesignUnitIssueCodes_Success:
            oechem.OEThrow.Warning("This generated DU did not pass DU validator.")
            oechem.OEThrow.Warning(validator.GetMessages())
        oechem.OEWriteDesignUnit(base_name.format(i), design_unit)


if __name__ == "__main__":
    sys.exit(main(sys.argv))

Listing 9: fasta_to_structmeta.py full listing.

#!/usr/bin/env python
# (C) 2024 Cadence Design Systems, Inc. (Cadence) 
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.


import sys
from openeye import oechem
from openeye import oespruce

def main(argv=sys.argv):

    if len(sys.argv) != 4:
        oechem.OEThrow.Usage("%s <FASTA> <ChainIDs> <output>" % sys.argv[0])

    fasta_input = sys.argv[1]
    ifs = oechem.oemolistream()
    ifs.open(fasta_input)

    fastaMol = oechem.OEGraphMol()

    json = oechem.oeout
    if not json.open(sys.argv[3]):
        oechem.OEThrow.Fatal(f'Unable to open {oname} for writing')

    StructMeta = oespruce.OEStructureMetadata()

    SeqMeta = oespruce.OESequenceMetadata()
    seq_list = []
    chainID = ""
    chainIDs = sys.argv[2].split(',')
    i = 0

    while oechem.OEReadFASTAFile(ifs, fastaMol):
        for res in oechem.OEGetResidues(fastaMol):
            seq_list.append(res.GetName()) 

        SeqMeta.SetChainID(chainIDs[i])
        SeqMeta.SetSequence("-".join(seq_list))
        StructMeta.AddSequenceMetadata(SeqMeta)
        i += 1
        if i > len(chainIDs):
            oechem.OEThrow.Fatal('More sequences are in the fasta file than the ChainIDs provided,')

    json.write(oespruce.OEStructureMetadataToJson(StructMeta))


if __name__ == "__main__":
    sys.exit(main(sys.argv))

Listing 10: superpose.py full listing.

#!/usr/bin/env python
# (C) 2022 Cadence Design Systems, Inc. (Cadence) 
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.

#############################################################################
# Simple superimposition of a fit protein on to a reference protein
#############################################################################
import sys
import os
from openeye import oechem
from openeye import oespruce
import tempfile


def ReadProteinMol(ifilename):
    if oechem.OEIsReadableDesignUnit(ifilename):
        du = oechem.OEDesignUnit()
        if not oechem.OEReadDesignUnit(ifilename, du):
            oechem.OEThrow.Fatal("Unable to open %s for reading OEDesignUnit" % ifilename)
        return du
    else:
        ifs = oechem.oemolistream()
        ifs.SetFlavor(oechem.OEFormat_PDB, oechem.OEIFlavor_PDB_SpruceDefault)
        ifs.SetFlavor(oechem.OEFormat_MMCIF, oechem.OEIFlavor_MMCIF_SpruceDefault)
        if not ifs.open(ifilename):
            oechem.OEThrow.Fatal("Unable to open %s for reading" % ifilename)
        mol = oechem.OEGraphMol()
        if not oechem.OEReadMolecule(ifs, mol):
            oechem.OEThrow.Fatal("Unable to read molecule from %s" % ifilename)
        return mol


def ReadSiteResidues(in_file):
    site_residues = []
    with open(in_file, "r") as f:
        lines = f.read().splitlines()

        for line in lines:
            if line.strip() == "":
                continue
            site_residues.append(line)
    return site_residues


def main(argv=[__name__]):
    if len(argv) < 3:
        oechem.OEThrow.Usage(f"{argv[0]} <reference protein PDB> <fit protein PDB> [global|ddm|weighted|sse|site] [site-residue file] [nowrite]")  # noqa

    inp_method = "global"
    if len(argv) > 3:
        inp_method = argv[3]

    site_file = None
    do_write = True
    if inp_method == "site":
        if len(argv) > 4:
            site_file = argv[4]
        else:
            oechem.OEThrow.Warning(f"A text file containing site residues must be provided for using the SiteSequence method\n")
            sys.exit(1)

        if not os.path.isfile(site_file):
            oechem.OEThrow.Warning(f"File not found: {site_file}\n")
            sys.exit(1)
        nowrite_argidx = 5
    else:
        nowrite_argidx = 4

    if len(argv) > nowrite_argidx:
        if argv[nowrite_argidx] != "nowrite":
            oechem.OEThrow.Warning(f"{argv[nowrite_argidx]} is not a valid option.\n")
            sys.exit(1)
        else:
            do_write = False

    ref_prot_file = argv[1]
    fit_prot_file = argv[2]

    ref_prot = ReadProteinMol(ref_prot_file)
    fit_prot = ReadProteinMol(fit_prot_file)

    method = oespruce.OEGetSuperposeMethodFromName(inp_method)
    if method == oespruce.OESuperposeMethod_Undefined:
        oechem.OEThrow.Warning(f"{inp_method} superposition method is not supported.\n")
        sys.exit(1)

    opts = oespruce.OESuperposeOptions(method)
    print(f"Superposing {fit_prot_file} to {ref_prot_file} using {oespruce.OEGetSuperposeMethodName(method)}.\n")

    results = oespruce.OESuperposeResults()
    superposition = oespruce.OESuperpose(opts)
    if opts.GetMethod() == oespruce.OESuperposeMethod_Site and not isinstance(ref_prot, oechem.OEDesignUnit):
        site_residues = ReadSiteResidues(site_file)
        superposition.SetupRef(ref_prot, site_residues)
    else:
        superposition.SetupRef(ref_prot)
    superposition.Superpose(results, fit_prot)

    rmsd = results.GetRMSD()
    seqscore = results.GetSeqScore()
    tanimoto = results.GetTanimoto()

    results.Transform(fit_prot)

    if opts.GetMethod() == oespruce.OESuperposeMethod_SSE:
        print(f"Tanimoto: {tanimoto:4.2f}\n")
    else:
        print(f"RMSD: {rmsd:4.2f} Angstroms.")
        print(f"SeqScore: {seqscore:d}.\n")

    if do_write:
        temp_dir = tempfile.mkdtemp()
        if isinstance(fit_prot, oechem.OEDesignUnit):
            str_pos = fit_prot_file.find(".oedu")
            base_name = fit_prot_file[0:str_pos]
            output_fit_file = os.path.join(temp_dir, base_name + "_sp.oedu")
            ofs = oechem.oeofstream(output_fit_file)
            oechem.OEWriteDesignUnit(ofs, fit_prot)
        else:
            str_pos = fit_prot_file.find(".pdb")
            base_name = fit_prot_file[0:str_pos]
            output_fit_file = os.path.join(temp_dir, base_name + "_sp.oeb.gz")
            ofs = oechem.oemolostream(output_fit_file)
            oechem.OEWriteMolecule(ofs, fit_prot)

        print(f"Superimposed fit protein was written to {output_fit_file}.\n")


if __name__ == "__main__":
    sys.exit(main(sys.argv))

Listing 11: validate_design_unit.py full listing.

#!/usr/bin/env python
# (C) 2025 Cadence Design Systems, Inc. (Cadence)
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.

import os
import sys
from openeye import oechem, oespruce


def main(argv=[__name__]):
    if len(sys.argv) < 2:
        print("Input needed:\npython validate_designunit.py <input.oedu>")
        sys.exit(1)

    ifilename = sys.argv[1]

    if not os.path.exists(ifilename):
        oechem.OEThrow.Fatal(f"{ifilename} does not exist.")

    du = oechem.OEDesignUnit()
    if not oechem.OEReadDesignUnit(ifilename, du):
        oechem.OEThrow.Fatal("Cannot read design unit!")

    validator = oespruce.OEValidateDesignUnit()
    issue_code = validator.Validate(du)

    if issue_code == oespruce.OEDesignUnitIssueCodes_Success:
        print(f"Validation successful for '{ifilename}")
    else:
        print(f"Validation raised the following warning(s) for '{ifilename}")
        print(validator.GetMessages())


if __name__ == "__main__":
    sys.exit(main(sys.argv))