Creating an OECIFData Object

In this example, we will use the OECIFData to get, add, delete, and set header data for an input CIF file. We will use the PDB code 5NEV read using the mmCIF format, but any CIF header can be accessed with these APIs. First, the molecule is read and the OECIFData object is created. This object holds the header data for easy editing. To save any changes made to the header back onto the molecule, use the OESetMMCIFChemCompData function.

Read the data and generate the OECIFData object.

    ifs = oechem.oemolistream()
    ifs.open(filename)
    mol = oechem.OEMol()
    oechem.OEReadMolecule(ifs, mol)
    cifData = oechem.OECIFData(mol)

    print(cifData.GetNumCategories())  # 81
    cifCategory = cifData.GetCategory("_audit_conform.")
    print(cifCategory.GetCategoryName())  # "_audit_conform."

    # Getting Data
    for attribute in cifData.GetAttributes("_audit_author."):
        print(attribute)  # name, pdbx_ordinal, identifier_ORCID
    for attributeValue in cifData.GetAttributeValues("_audit_author.", "name"):
        print(attributeValue)  # Various author Names
    print(cifData.GetAttributeValues("_audit_author.", "name")[0])  # Coxon, C.R.
    print(cifData.GetAttributeValues("_audit_author.", "name", raw=True)[0])  # 'Coxon, C.R.'

    # Setting, Adding and Deleting Data
    # Pair Data
    cifData.SetData("_pdbx_audit_revision_details.", "details", "No details")
    cifData.AddData("_database_2.", "details")

    # Loop Data
    cifData.SetData("_entity_name_com.", "name", 0, "No Cell Division Protein Kinase 2,p33 protein kinase")
    cifData.AddRow("_audit_author.", ["Doe J.", "19", "0000-0000-0867-5309"])

    # Deleting a Category
    cifData.DeleteCategory("_pdbx_poly_seq_scheme.")
    print(cifData.GetNumCategories())  # 80

    # Saving
    oechem.OESetMMCIFData(mol, cifData)

    # Modifying the Molecule
    mol.SetTitle("mod5NEV")
    for atom in mol.GetAtoms():
        res = oechem.OEAtomGetResidue(atom)
        if oechem.OEGetResidueIndex(res) == oechem.OEResidueIndex_HOH:
            continue
        if res.GetExtChainID() == "A" and res.GetEntityID() == "1":
            res.SetExtChainID("Z")
            res.SetSubChainID("Z")
            res.SetEntityID("9")
            oechem.OEAtomSetResidue(atom, res)

    # Update header with molecule changes
    copts = oechem.OECIFOptions()
    copts.SetPerceiveEntity(True)
    copts.SetPerceiveStruct(True)
    cifData.Update(mol, copts)
    print(cifData.GetAttributeValue("_entry.", "id"))  # mod5NEV
    struct_asym_id_idx = cifData.GetAttributeIndex("_struct_asym.", "id")
    struct_asym_entity_id_idx = cifData.GetAttributeIndex("_struct_asym.", "entity_id")
    struct_asym_row = cifData.GetRow("_struct_asym.", 9)
    print(struct_asym_row[struct_asym_id_idx])  # "Z"
    print(struct_asym_row[struct_asym_entity_id_idx])  # "9"

if __name__ == "__main__":
    sys.exit(main(sys.argv))

The OECIFData class allows access to all the data in a CIF header, but each CIF header category in a data block is broken into smaller OECIFCategory objects which can be accessed and modified.

    print(cifData.GetNumCategories())  # 81
    cifCategory = cifData.GetCategory("_audit_conform.")
    print(cifCategory.GetCategoryName())  # "_audit_conform."

    # Getting Data
    for attribute in cifData.GetAttributes("_audit_author."):
        print(attribute)  # name, pdbx_ordinal, identifier_ORCID
    for attributeValue in cifData.GetAttributeValues("_audit_author.", "name"):
        print(attributeValue)  # Various author Names
    print(cifData.GetAttributeValues("_audit_author.", "name")[0])  # Coxon, C.R.
    print(cifData.GetAttributeValues("_audit_author.", "name", raw=True)[0])  # 'Coxon, C.R.'

    # Setting, Adding and Deleting Data
    # Pair Data
    cifData.SetData("_pdbx_audit_revision_details.", "details", "No details")
    cifData.AddData("_database_2.", "details")

    # Loop Data
    cifData.SetData("_entity_name_com.", "name", 0, "No Cell Division Protein Kinase 2,p33 protein kinase")
    cifData.AddRow("_audit_author.", ["Doe J.", "19", "0000-0000-0867-5309"])

    # Deleting a Category
    cifData.DeleteCategory("_pdbx_poly_seq_scheme.")
    print(cifData.GetNumCategories())  # 80

    # Saving
    oechem.OESetMMCIFData(mol, cifData)

    # Modifying the Molecule
    mol.SetTitle("mod5NEV")
    for atom in mol.GetAtoms():
        res = oechem.OEAtomGetResidue(atom)
        if oechem.OEGetResidueIndex(res) == oechem.OEResidueIndex_HOH:
            continue
        if res.GetExtChainID() == "A" and res.GetEntityID() == "1":
            res.SetExtChainID("Z")
            res.SetSubChainID("Z")
            res.SetEntityID("9")
            oechem.OEAtomSetResidue(atom, res)

    # Update header with molecule changes
    copts = oechem.OECIFOptions()
    copts.SetPerceiveEntity(True)
    copts.SetPerceiveStruct(True)
    cifData.Update(mol, copts)
    print(cifData.GetAttributeValue("_entry.", "id"))  # mod5NEV
    struct_asym_id_idx = cifData.GetAttributeIndex("_struct_asym.", "id")
    struct_asym_entity_id_idx = cifData.GetAttributeIndex("_struct_asym.", "entity_id")
    struct_asym_row = cifData.GetRow("_struct_asym.", 9)
    print(struct_asym_row[struct_asym_id_idx])  # "Z"
    print(struct_asym_row[struct_asym_entity_id_idx])  # "9"

if __name__ == "__main__":
    sys.exit(main(sys.argv))

For more information about OECIFData, OECIFCategory, and the CIF file format, see CIF File Format.

Attribute data is accessed by first specifying the CIF category name, and a specific data value in that category can be accessed with the attribute. For the following example, the category is ‘_audit_author.’, and the values correlate with the attribute names.

    for attribute in cifData.GetAttributes("_audit_author."):
        print(attribute)  # name, pdbx_ordinal, identifier_ORCID
    for attributeValue in cifData.GetAttributeValues("_audit_author.", "name"):
        print(attributeValue)  # Various author Names
    print(cifData.GetAttributeValues("_audit_author.", "name")[0])  # Coxon, C.R.
    print(cifData.GetAttributeValues("_audit_author.", "name", raw=True)[0])  # 'Coxon, C.R.'

    # Setting, Adding and Deleting Data
    # Pair Data
    cifData.SetData("_pdbx_audit_revision_details.", "details", "No details")
    cifData.AddData("_database_2.", "details")

    # Loop Data
    cifData.SetData("_entity_name_com.", "name", 0, "No Cell Division Protein Kinase 2,p33 protein kinase")
    cifData.AddRow("_audit_author.", ["Doe J.", "19", "0000-0000-0867-5309"])

    # Deleting a Category
    cifData.DeleteCategory("_pdbx_poly_seq_scheme.")
    print(cifData.GetNumCategories())  # 80

    # Saving
    oechem.OESetMMCIFData(mol, cifData)

    # Modifying the Molecule
    mol.SetTitle("mod5NEV")
    for atom in mol.GetAtoms():
        res = oechem.OEAtomGetResidue(atom)
        if oechem.OEGetResidueIndex(res) == oechem.OEResidueIndex_HOH:
            continue
        if res.GetExtChainID() == "A" and res.GetEntityID() == "1":
            res.SetExtChainID("Z")
            res.SetSubChainID("Z")
            res.SetEntityID("9")
            oechem.OEAtomSetResidue(atom, res)

    # Update header with molecule changes
    copts = oechem.OECIFOptions()
    copts.SetPerceiveEntity(True)
    copts.SetPerceiveStruct(True)
    cifData.Update(mol, copts)
    print(cifData.GetAttributeValue("_entry.", "id"))  # mod5NEV
    struct_asym_id_idx = cifData.GetAttributeIndex("_struct_asym.", "id")
    struct_asym_entity_id_idx = cifData.GetAttributeIndex("_struct_asym.", "entity_id")
    struct_asym_row = cifData.GetRow("_struct_asym.", 9)
    print(struct_asym_row[struct_asym_id_idx])  # "Z"
    print(struct_asym_row[struct_asym_entity_id_idx])  # "9"

if __name__ == "__main__":
    sys.exit(main(sys.argv))

Editing data items can be done using the Set, Add, and Delete OECIFData functions. When specifying a data value with multiple rows of data (i.e., CIF Loop Data), attribute values on a specific row can be accessed with a base-0 row index:

    # Pair Data
    cifData.SetData("_pdbx_audit_revision_details.", "details", "No details")
    cifData.AddData("_database_2.", "details")

    # Loop Data
    cifData.SetData("_entity_name_com.", "name", 0, "No Cell Division Protein Kinase 2,p33 protein kinase")
    cifData.AddRow("_audit_author.", ["Doe J.", "19", "0000-0000-0867-5309"])

    # Deleting a Category
    cifData.DeleteCategory("_pdbx_poly_seq_scheme.")
    print(cifData.GetNumCategories())  # 80

    # Saving
    oechem.OESetMMCIFData(mol, cifData)

    # Modifying the Molecule
    mol.SetTitle("mod5NEV")
    for atom in mol.GetAtoms():
        res = oechem.OEAtomGetResidue(atom)
        if oechem.OEGetResidueIndex(res) == oechem.OEResidueIndex_HOH:
            continue
        if res.GetExtChainID() == "A" and res.GetEntityID() == "1":
            res.SetExtChainID("Z")
            res.SetSubChainID("Z")
            res.SetEntityID("9")
            oechem.OEAtomSetResidue(atom, res)

    # Update header with molecule changes
    copts = oechem.OECIFOptions()
    copts.SetPerceiveEntity(True)
    copts.SetPerceiveStruct(True)
    cifData.Update(mol, copts)
    print(cifData.GetAttributeValue("_entry.", "id"))  # mod5NEV
    struct_asym_id_idx = cifData.GetAttributeIndex("_struct_asym.", "id")
    struct_asym_entity_id_idx = cifData.GetAttributeIndex("_struct_asym.", "entity_id")
    struct_asym_row = cifData.GetRow("_struct_asym.", 9)
    print(struct_asym_row[struct_asym_id_idx])  # "Z"
    print(struct_asym_row[struct_asym_entity_id_idx])  # "9"

if __name__ == "__main__":
    sys.exit(main(sys.argv))

The OECIFData object can be used to update or synchronize header data with changes to the molecule. We can simulate this by first modifying a part of the molecule:

    mol.SetTitle("mod5NEV")
    for atom in mol.GetAtoms():
        res = oechem.OEAtomGetResidue(atom)
        if oechem.OEGetResidueIndex(res) == oechem.OEResidueIndex_HOH:
            continue
        if res.GetExtChainID() == "A" and res.GetEntityID() == "1":
            res.SetExtChainID("Z")
            res.SetSubChainID("Z")
            res.SetEntityID("9")
            oechem.OEAtomSetResidue(atom, res)

    # Update header with molecule changes
    copts = oechem.OECIFOptions()
    copts.SetPerceiveEntity(True)
    copts.SetPerceiveStruct(True)
    cifData.Update(mol, copts)
    print(cifData.GetAttributeValue("_entry.", "id"))  # mod5NEV
    struct_asym_id_idx = cifData.GetAttributeIndex("_struct_asym.", "id")
    struct_asym_entity_id_idx = cifData.GetAttributeIndex("_struct_asym.", "entity_id")
    struct_asym_row = cifData.GetRow("_struct_asym.", 9)
    print(struct_asym_row[struct_asym_id_idx])  # "Z"
    print(struct_asym_row[struct_asym_entity_id_idx])  # "9"

if __name__ == "__main__":
    sys.exit(main(sys.argv))

Updating requires the user to specify which parts of the header are to be updated. This is done using the OECIFOptions class. A more detailed explanation about which data items are affected by perception can be found in the CIF File Format theory. In the following example, we update the header and check to make sure that our residue changes from above have been applied to the pertinent header locations.

    copts = oechem.OECIFOptions()
    copts.SetPerceiveEntity(True)
    copts.SetPerceiveStruct(True)
    cifData.Update(mol, copts)
    print(cifData.GetAttributeValue("_entry.", "id"))  # mod5NEV
    struct_asym_id_idx = cifData.GetAttributeIndex("_struct_asym.", "id")
    struct_asym_entity_id_idx = cifData.GetAttributeIndex("_struct_asym.", "entity_id")
    struct_asym_row = cifData.GetRow("_struct_asym.", 9)
    print(struct_asym_row[struct_asym_id_idx])  # "Z"
    print(struct_asym_row[struct_asym_entity_id_idx])  # "9"

if __name__ == "__main__":
    sys.exit(main(sys.argv))