Extracting Biounits from PDB header remarks

Biounit (BU) extraction is one of the more basic functionalities offered through the Spruce TK. This example shows how to extract BUs using the PDB header remarks.

Command Line Interface

This example takes in a PDB file, and optionally the maximum number of atoms to be considered a BU as well as a boolean flag to instruct the code to prefer author or software generated headers (see OEExtractBioUnits for details on the API).

prompt> extract_biounits_remarks <extract protein PDB> [max atoms] [prefer author]

Code

Download code

extract_biounits_remarks.py and 4re6.pdb (the extraction PDB file)

#!/usr/bin/env python
# (C) 2022 Cadence Design Systems, Inc. (Cadence) 
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.

#############################################################################
# Simple superimposition of a fit protein on to a reference protein
#############################################################################
import sys
import os
from openeye import oechem
from openeye import oespruce
import tempfile


def ReadProteinMol(pdb_file, mol):
    ifs = oechem.oemolistream()
    ifs.SetFlavor(oechem.OEFormat_PDB, oechem.OEIFlavor_PDB_SpruceDefault)
    ifs.SetFlavor(oechem.OEFormat_MMCIF, oechem.OEIFlavor_MMCIF_SpruceDefault)

    if not ifs.open(pdb_file):
        oechem.OEThrow.Fatal("Unable to open %s for reading." % pdb_file)

    temp_mol = oechem.OEGraphMol()
    if not oechem.OEReadMolecule(ifs, temp_mol):
        oechem.OEThrow.Fatal("Unable to read molecule from %s." % pdb_file)
    ifs.close()

    fact = oechem.OEAltLocationFactory(temp_mol)
    mol.Clear()
    fact.MakePrimaryAltMol(mol)
    return (mol)


def main(argv=[__name__]):
    if len(argv) not in [2, 4, 5]:
        oechem.OEThrow.Usage("%s <extract protein PDB> [max atoms] [prefer author] [nowrite]" % argv[0])  # noqa

    do_write = True
    if len(argv) == 5:
        if argv[4] != "nowrite":
            oechem.OEThrow.Warning("%s is not a valid option.\n" % argv[4])
            sys.exit(1)
        else:
            do_write = False

    opts = oespruce.OEBioUnitExtractionOptions()
    if len(argv) >= 4:
        opts.SetMaxAtoms(int(argv[2]))
        opts.SetPreferAuthorRecord(bool(argv[3]))

    extract_prot_file = argv[1]
    extract_prot = oechem.OEGraphMol()
    extract_success = ReadProteinMol(extract_prot_file, extract_prot)
    if not extract_success:
        oechem.OEThrow.Fatal("Unable to read protein(s) from PDB file.")

    # @ <SNIPPET-OEExtractBioUnits-REMARKS>
    biounits = oespruce.OEExtractBioUnits(extract_prot, opts)
    # @ </SNIPPET-OEExtractBioUnits-REMARKS>

    if do_write:
        pdb_ext = ".pdb"
        str_pos = extract_prot_file.find(pdb_ext)
        base_name = extract_prot_file[0:str_pos]
        temp_dir = tempfile.mkdtemp()

        for i, biounit in enumerate(biounits):
            output_biounit_file = os.path.join(temp_dir, base_name + "_BU_{}.oeb.gz".format(i))  # noqa
            print("Writing biounit {} to {}".format(i, output_biounit_file))
            ofs = oechem.oemolostream(output_biounit_file)
            oechem.OEWriteMolecule(ofs, biounit)


if __name__ == "__main__":
    sys.exit(main(sys.argv))

Example

prompt> extract_biounits_remarks.py 4re6.pdb

will generate the following output:

Writing biounit 0 to 4re6_BU_0.oeb.gz
Writing biounit 1 to 4re6_BU_1.oeb.gz