Counting molecules

A program that counts the number of molecules, or the number of conformers, in an input molecule file, or files. The flag -conf is used to count conformers. The output is the total molecules or conformers per file, and the total sum if more than one file is used as input. If conformers are counted the average conformer count per molecule is also output.

Example

prompt> molcount.py -conf input1.oeb.gz input2.oeb.gz

Code

Download code

molcount.py

#!/usr/bin/env python
# (C) 2022 Cadence Design Systems, Inc. (Cadence) 
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.

#############################################################################
# Counts molecule (and conformers) in input files
#############################################################################
import sys
from openeye import oechem


def PrintConfInfo(nconfs, nmols):
    print("Total # of conformers:  ", nconfs)
    avg = 0
    if nmols:
        avg = float(nconfs) / nmols
    print("Average # of conformers:", avg)


def MolCount(ifs, fname, conffrag):
    nummols = 0
    numconfs = 0
    for mol in ifs.GetOEMols():
        nummols += 1
        if conffrag:
            numconfs += mol.NumConfs()

    print("%s contains %d molecule(s)." % (fname, nummols))

    if conffrag:
        PrintConfInfo(numconfs, nummols)
        print("-----------------------------------------------------------")

    return nummols, numconfs


def main(argv=[__name__]):
    itf = oechem.OEInterface(Interface, argv)
    conffrag = itf.GetBool("-conf")
    confomega = itf.GetBool("-confomega")

    totmols = 0
    totconfs = 0
    for fname in itf.GetStringList("-i"):
        ifs = oechem.oemolistream()
        if not ifs.open(fname):
            oechem.OEThrow.Warning("Unable to open %s for reading" % fname)
            continue
        if confomega:
            conffrag = True
            ifs.SetConfTest(oechem.OEOmegaConfTest(False))
        nummol, numconfs = MolCount(ifs, fname, conffrag)
        totmols += nummol
        totconfs += numconfs

    print("===========================================================")
    print("Total %d molecules" % totmols)
    if conffrag:
        PrintConfInfo(totconfs, totmols)


Interface = """
!BRIEF [-conf | -confomega] [-i] <infile1> [<infile2>...]
!PARAMETER -i
  !ALIAS -in
  !TYPE string
  !LIST true
  !REQUIRED true
  !BRIEF Input file name(s)
  !KEYLESS 1
!END
!PARAMETER -conf
  !ALIAS -c
  !TYPE bool
  !DEFAULT false
  !BRIEF Count conformers
!END
!PARAMETER -confomega
  !TYPE bool
  !DEFAULT false
  !BRIEF Count Omega conformers
!END
"""


if __name__ == "__main__":
    sys.exit(main(sys.argv))