OEMedChem Examples

Bemis Murcko perception

/*****************************************************************************
 * Copyright (C) 2014 OpenEye Scientific Software, Inc.
 *****************************************************************************
 * Utility to fragment the input structures by Bemis-Murcko rules
 * ---------------------------------------------------------------------------
 * BemisMurckoPerception input_mols output_mols [uncolor]
 *
 * input_mols: filename of molecules to fragment and uncolor
 * output_mols: filename of input structure with SDData of perceived regions
 * [uncolor]: optional 4th arg to request uncoloring of output fragment info
 *****************************************************************************/
package openeye.examples.oemedchem;

import openeye.oechem.*;
import openeye.oemedchem.*;

public class BemisMurckoPerception {
    public static void main(String[] args) {
        OEInterface itf = new OEInterface(interfaceData, "BemisMurckoPerception", args);

        // flag on command line indicates uncoloring option or not;
        boolean bUncolor = itf.GetBool("-uncolor");

        // input structure(s) to transform;
        oemolistream ifsmols = new oemolistream();
        if (!ifsmols.open(itf.GetString("-i")))
            oechem.OEThrow.Fatal("Unable to open file for reading: " + itf.GetString("-i"));

        // save output structure(s) to this file;
        oemolostream ofs = new oemolostream();
        if (!ofs.open(itf.GetString("-o")))
            oechem.OEThrow.Fatal("Unable to open file for writing: " + itf.GetString("-o"));
        if (!oechem.OEIsSDDataFormat(ofs.GetFormat()))
            oechem.OEThrow.Fatal("Output file format does not support SD data: " + itf.GetString("-o"));

        int irec = 0;
        int ototal = 0;
        OEGraphMol mol = new OEGraphMol();
        while (oechem.OEReadMolecule(ifsmols,mol)) {
            ++irec;
            oechem.OETheFunctionFormerlyKnownAsStripSalts(mol);

            int regions = 0;
            OEGraphMol frag = new OEGraphMol();
            for (OEAtomBondSet abset : oemedchem.OEGetBemisMurcko(mol)) {
                ++regions;
                // create a fragment from the perceived region;
                oechem.OESubsetMol(frag, mol, abset, true);
                if (bUncolor) {
                    // uncolor the fragment;
                    oechem.OEUncolorMol(frag);
                }
                final String smi = oechem.OEMolToSmiles(frag);
                // annotate the input molecule with the role information;
                for (OERole role : abset.GetRoles())
                    oechem.OEAddSDData(mol, role.GetName(), smi);

            }
            if (regions == 0) {
                String name = mol.GetTitle();
                if (name.length() == 0)
                    name = "Record " + irec;
                oechem.OEThrow.Warning(name + ": no perceived regions");
            }
            else {
                ++ototal;
                oechem.OEWriteMolecule(ofs, mol);
            }
        }
        if (irec == 0)
            oechem.OEThrow.Fatal("No records in input structure file to perceive");

        if (ototal ==0)
            oechem.OEThrow.Warning("No annotated structures generated");

        System.out.printf("Input molecules=%d, output annotated %smolecules=%d%n",
                          irec, ((bUncolor) ? "(uncolored) " : ""), ototal);

    }
    private static String interfaceData =
"!BRIEF  [ -uncolor ] [-i] <infile1> [-o] <infile2>\n" +
"!PARAMETER -i\n" +
"  !ALIAS -in\n" +
"  !ALIAS -input\n" +
"  !TYPE string\n" +
"  !REQUIRED true\n" +
"  !BRIEF Input structure file name\n" +
"  !KEYLESS 1\n" +
"!END\n" +
"!PARAMETER -o\n" +
"  !ALIAS -out\n" +
"  !ALIAS -output\n" +
"  !TYPE string\n" +
"  !REQUIRED true\n" +
"  !BRIEF Output SD file name\n" +
"  !KEYLESS 2\n" +
"!END\n" +
"!PARAMETER -uncolor\n" +
"  !ALIAS -u\n" +
"  !TYPE bool\n" +
"  !DEFAULT false\n" +
"  !BRIEF Uncolor output molecules\n" +
"!END\n";
}

See also

Matched Pair analysis and transformations

/*****************************************************************************
 * Copyright (C) 2014-2015 OpenEye Scientific Software, Inc.
 *****************************************************************************
 * Utility to perform a matched pair analysis on a set of structures
 *  and use the transformations discovered to alter a second set of structures
 * ---------------------------------------------------------------------------
 * MatchedPairTransform index_mols input_mols output_mols
 *
 * index_mols: filename of input molecules to analyze
 * input_mols: filename of molecules to transform based on analysis
 * output_mols: filename to collect transformed molecules
 *****************************************************************************/
package openeye.examples.oemedchem;

import openeye.oechem.*;
import openeye.oemedchem.*;

public class MatchedPairTransform {
    private static void Analyze(OEInterface itf) {
        // input structures to index;
        oemolistream ifsindex = new oemolistream();
        if (!ifsindex.open(itf.GetString("-index")))
            oechem.OEThrow.Fatal("Unable to open index file for reading: " + itf.GetString("-index"));

        // input structure(s) to transform;
        oemolistream ifsmols = new oemolistream();
        if (!ifsmols.open(itf.GetString("-input")))
            oechem.OEThrow.Fatal("Unable to open input file for reading: " + itf.GetString("-input"));

        // save output structure(s) to this file;
        oemolostream ofs = new oemolostream();
        if (!ofs.open(itf.GetString("-output")))
            oechem.OEThrow.Fatal("Unable to open output file for writing: " + itf.GetString("-output"));

        // create options class with defaults;
        OEMatchedPairAnalyzerOptions opts = new OEMatchedPairAnalyzerOptions();
        // setup options from command line
        if (!oemedchem.OESetupMatchedPairIndexOptions(opts, itf))
            oechem.OEThrow.Fatal("Error setting matched pair indexing options!");

        if (!opts.HasIndexableFragmentHeavyAtomRange())
            oechem.OEThrow.Info("Indexing all fragments");
        else
            System.out.printf("Using index range=%.1f-%.1f%%%n",
                              opts.GetIndexableFragmentRangeMin(), opts.GetIndexableFragmentRangeMax());

        // request a specific context for the transform activity, here 0-bonds;
        int chemctxt = OEMatchedPairContext.Bond0;
        String askcontext = itf.GetString("-context");
        char ctxt = askcontext.charAt(0);
        switch (ctxt) {
        case '0':
            chemctxt = OEMatchedPairContext.Bond0;
            break;
        case '1':
            chemctxt = OEMatchedPairContext.Bond1;
            break;
        case '2':
            chemctxt = OEMatchedPairContext.Bond2;
            break;
        case '3':
            chemctxt = OEMatchedPairContext.Bond3;
            break;
        case 'a':
        case 'A':
            chemctxt = OEMatchedPairContext.AllBonds;
            break;
        default:
            oechem.OEThrow.Fatal("Invalid context specified: " + askcontext + ", only 0|1|2|3|A allowed");
            break;
        }

        // create indexing engine;
        OEMatchedPairAnalyzer mmp = new OEMatchedPairAnalyzer(opts);

        boolean verbose = itf.GetBool("-verbose");

        // add molecules to be indexed;
        OEGraphMol mol = new OEGraphMol();
        int record = 0;
        while (oechem.OEReadMolecule(ifsindex,mol)) {
            int status = mmp.AddMol(mol,++record);
            if (verbose && status != record)
                oechem.OEThrow.Info("Input structure not added to index, record=" + record +
                                    " status=" + oemedchem.OEMatchedPairIndexStatusName(status));
        }

        if (mmp.NumMols() == 0)
            oechem.OEThrow.Fatal("No records in input structure file for indexing");

        if (mmp.NumMatchedPairs() == 0)
            oechem.OEThrow.Fatal("No matched pairs found from indexing, use -fragGe,-fragLe options to extend index range");

        // return some status information;
        System.out.printf("indexed molecules=%d matched pairs=%d%n",
                          mmp.NumMols(),mmp.NumMatchedPairs());

        int minpairs = itf.GetInt("-minpairs");
        if (minpairs > 1)
            oechem.OEThrow.Info("Requiring at least " + minpairs + " matched pairs to apply transformations");

        int orec = 0;
        int ocnt = 0;
        int ototal = 0;
        while (oechem.OEReadMolecule(ifsmols,mol)) {
            ++orec ;
            ocnt = 0;
            for (OEMolBase outmol : oemedchem.OEMatchedPairApplyTransforms(mol, mmp, chemctxt, minpairs)) {
                ++ocnt ;
                oechem.OEWriteMolecule(ofs, outmol);
            }
            ototal += ocnt ;
            if (verbose && ocnt == 0) {
                // as minpairs increases, fewer transformed mols are generated - output if requested
                String name = mol.GetTitle();
                if (name.length() == 0)
                    name = "Record " + orec;
                oechem.OEThrow.Info(name + ": did not produce any output");
            }
        }
        if (orec == 0)
            oechem.OEThrow.Fatal("No records in input structure file to transform");

        if (ototal == 0)
            oechem.OEThrow.Fatal("No transformed structures generated");

        System.out.printf("Input molecules=%d Output molecules=%d%n", orec, ocnt);
    }

    public static void main (String[] args) {
        OEInterface itf = new OEInterface ();
        oechem.OEConfigure(itf, interfaceData);
        oemedchem.OEConfigureMatchedPairIndexOptions(itf);

        if (oechem.OEParseCommandLine (itf, args, "MatchedPairTransform")) {
            Analyze(itf);
        }
    }

    private static String interfaceData =
"!CATEGORY MatchedPairTransform\n" +
"    !CATEGORY I/O\n" +
"        !PARAMETER -index 1\n" +
"          !TYPE string\n" +
"          !REQUIRED true\n" +
"          !BRIEF Input filename of structures to index\n" +
"          !KEYLESS 1\n" +
"        !END\n" +
"        !PARAMETER -input 2\n" +
"          !ALIAS -i\n" +
"          !ALIAS -in\n" +
"          !TYPE string\n" +
"          !REQUIRED true\n" +
"          !BRIEF Input filename of structures to process based on matched pairs discovered from indexing\n" +
"          !KEYLESS 2\n" +
"        !END\n" +
"        !PARAMETER -output 3\n" +
"          !ALIAS -o\n" +
"          !ALIAS -out\n" +
"          !TYPE string\n" +
"          !REQUIRED true\n" +
"          !BRIEF Output filename\n" +
"          !KEYLESS 3\n" +
"        !END\n" +
"    !END\n" +
"    !CATEGORY options\n" +
"        !PARAMETER -context 1\n" +
"           !ALIAS -c\n" +
"           !TYPE string\n" +
"           !DEFAULT 0\n" +
"           !BRIEF chemistry context to use for the transformation [0|1|2|3|A]\n" +
"        !END\n" +
"        !PARAMETER -minpairs 2\n" +
"           !TYPE int\n" +
"           !DEFAULT 0\n" +
"           !BRIEF require at least -minpairs to apply the transformations (default: all)\n" +
"        !END\n" +
"        !PARAMETER -verbose 3\n" +
"           !TYPE bool\n" +
"           !DEFAULT 0\n" +
"           !BRIEF generate verbose output\n" +
"        !END\n" +
"    !END\n" +
"!END";
}

Matched Pair analysis and listing of transformations

/*****************************************************************************
 * Copyright (C) 2014-2015 OpenEye Scientific Software, Inc.
 *****************************************************************************
 * Utility to perform a matched pair analysis on a set of structures
 *  and dump a listing of the transformations derived from matched pairs found
 * ---------------------------------------------------------------------------
 * MatchedPairTransformList index_mols
 *
 * index_mols: filename of input molecules to analyze
 *****************************************************************************/
package openeye.examples.oemedchem;

import openeye.oechem.*;
import openeye.oemedchem.*;
import java.util.*;

public class MatchedPairTransformList {
    // simple internal class to rank transform output
    public static class MMPXform implements Comparable<MMPXform> {
        OEMatchedPairTransform xform;
        float avg;
        float std;
        int num;

        public MMPXform(OEMatchedPairTransform xfm,
                        float average, float stddev, int count) {
            xform = xfm;
            avg = average;
            std = stddev;
            num = count;
        }

        public int compareTo(MMPXform another) {
            // sort descending by absolute value of the average
            double lAvg = Math.abs(this.avg);
            double rAvg = Math.abs(another.avg);

            int retval = 0;
            if (lAvg > rAvg)
                retval = -1;
            else if (lAvg < rAvg)
                retval = 1;
            return retval;
        }
    }

    // compute some simple statistics for ranking
    private static float Average(List<Float> data) {
        Float avg = null;
        for (Float value : data) {
            if (avg == null)
                avg = value;
            else
                avg += value;
        }
        if (avg != null)
            avg = avg / (float)data.size();

        return avg;
    }
    private static float StdDev(List<Float> data) {
        if (data.size() <= 1)
            return 0.0f;

        float mean = Average(data);

        // calculate the sum of squares
        double sum = 0;
        for (Float value : data) {
            final double v = value - mean;
            sum += v * v;
        }
        return (float)Math.sqrt( sum / data.size() );
    }

    private static void MMPAnalyze(OEInterface itf) {
        // input structures to index;
        oemolistream ifs = new oemolistream();
        if (!ifs.open(itf.GetString("-input")))
            oechem.OEThrow.Fatal("Unable to open input file for reading: " + itf.GetString("-input"));

        // request a specific context for the transform activity, here 0-bonds;
        int chemctxt = OEMatchedPairContext.Bond0;
        String askcontext = itf.GetString("-context");
        char ctxt = askcontext.charAt(0);
        switch (ctxt) {
        case '0':
            chemctxt = OEMatchedPairContext.Bond0;
            break;
        case '1':
            chemctxt = OEMatchedPairContext.Bond1;
            break;
        case '2':
            chemctxt = OEMatchedPairContext.Bond2;
            break;
        case '3':
            chemctxt = OEMatchedPairContext.Bond3;
            break;
        case 'a':
        case 'A':
            chemctxt = OEMatchedPairContext.AllBonds;
            break;
        default:
            oechem.OEThrow.Fatal("Invalid context specified: " +
                                 askcontext + ", only 0|1|2|3|A allowed");
            break;
        }

        boolean bPrintTransforms = itf.GetBool("-printlist");
        // if a data field was specified, retreive the SD data field name
        String field = null;
        if (itf.HasString("-datafield"))
            field = itf.GetString("-datafield");

        // create options class with defaults;
        OEMatchedPairAnalyzerOptions opts = new OEMatchedPairAnalyzerOptions();
        // setup options from command line
        if (!oemedchem.OESetupMatchedPairIndexOptions(opts, itf))
            oechem.OEThrow.Fatal("Error setting matched pair indexing options!");

        if (!opts.HasIndexableFragmentHeavyAtomRange())
            oechem.OEThrow.Info("Indexing all fragments");
        else
            System.out.printf("Using index range=%.1f-%.1f%%%n",
                              opts.GetIndexableFragmentRangeMin(), opts.GetIndexableFragmentRangeMax());

        if (field == null && !bPrintTransforms) {
            oechem.OEThrow.Info("Specify -datafield or -printlist, otherwise nothing to do!");
            return;
        }

        // create indexing engine;
        OEMatchedPairAnalyzer mmp = new OEMatchedPairAnalyzer(opts);

        // add molecules to be indexed;
        boolean bFoundData = false;
        OEGraphMol mol = new OEGraphMol();
        int record = 0;
        while (oechem.OEReadMolecule(ifs,mol)) {
            int status = mmp.AddMol(mol,++record);
            if (status != record) {
                System.out.printf("Error adding input structure to index, record=%d status=%s%n",
                                  record, oemedchem.OEMatchedPairIndexStatusName(status));
            }
            else if (field != null && oechem.OEHasSDData(mol,field)) {
                // validate that data field value is numeric
                Float dataValue = null;
                try {
                    dataValue = Float.valueOf(oechem.OEGetSDData(mol,field));
                }
                catch (NumberFormatException e) {
                    dataValue = null;
                }
                if (dataValue != null)
                    bFoundData = true;
                else
                    oechem.OEThrow.Fatal(Integer.toString(record) + ": Non-numeric data for field " + field +
                                         " found, " + oechem.OEGetSDData(mol,field));
            }
        }
        if (mmp.NumMols() == 0)
            oechem.OEThrow.Fatal("No records in input structure file were indexed");

        if (field != null && !bFoundData)
            oechem.OEThrow.Fatal("No data found for requested field, " + field);

        if (mmp.NumMatchedPairs() == 0)
            oechem.OEThrow.Fatal("No matched pairs found from indexing, use -fragGe,-fragLe options to extend index range");

        // controls how transforms are extracted (direction and allowed properties)
        int extractMode = (OEMatchedPairTransformExtractMode.Sorted +
                           OEMatchedPairTransformExtractMode.NoSMARTS);

        // now walk the transforms from the indexed matched pairs
        List<MMPXform> xforms = new ArrayList<MMPXform>();

        int xfmidx = 0;
        for (OEMatchedPairTransform mmpxform : oemedchem.OEMatchedPairGetTransforms(mmp,
                                                                                    chemctxt,extractMode)) {
            ++xfmidx;
            if (bPrintTransforms)
                System.out.printf("%2d %s%n",
                                  xfmidx,
                                  mmpxform.GetTransform());

            int mmpidx = 0;
            List<Float> prop = new ArrayList<Float>();
            for (OEMatchedPair mmppair : mmpxform.GetMatchedPairs()) {
                ++mmpidx;
                String mmpinfo = String.format("\t%2d: (%2d,%2d)",
                                               mmpidx,mmppair.FromIndex(),mmppair.ToIndex());

                for (String tag : mmppair.GetDataTags()) {
                    mmpinfo = mmpinfo + String.format(" %s=(%s,%s)",
                                                      tag,
                                                      mmppair.GetFromSDData(tag),
                                                      mmppair.GetToSDData(tag));
                    if (tag.equals(field)) {
                        Float fromValue = Float.valueOf(mmppair.GetFromSDData(tag));
                        Float toValue = Float.valueOf(mmppair.GetToSDData(tag));
                        prop.add(toValue - fromValue);
                    }

                }
                if (bPrintTransforms)
                    System.out.printf("%s%n",mmpinfo);
            }
            // skip if property not found
            if (!prop.isEmpty()) {
                // add
                MMPXform item = new MMPXform(mmpxform, (float)Average(prop), (float)StdDev(prop), prop.size());
                xforms.add(item);
            }
        }

        if (field == null)
            return;

        if (xforms.isEmpty())
            oechem.OEThrow.Error("No matched pairs found with " + field + " data");

        // sort the transforms by largest absolute delta property value
        Collections.sort(xforms);

        System.out.printf("%n*** Transforms sorted by delta %s%n", field);

        int idx = 0;
        for (MMPXform xfm : xforms) {
            ++idx;
            if ((extractMode & OEMatchedPairTransformExtractMode.NoSMARTS) != 0) {
                // not 'invertable' if SMARTS qualifiers were applied
                if (xfm.avg < 0.f) {
                    xfm.avg = -1.f * xfm.avg;
                    xfm.xform.Invert();
                }
            }
            System.out.printf("%2d %s=(avg=%5.2f,stdev=%.2f,num=%d) %s%n",idx,
                              field,
                              (float)xfm.avg,
                              (float)xfm.std,
                              xfm.num,
                              xfm.xform.GetTransform());
        }
    }

    public static void main (String[] args) {
        OEInterface itf = new OEInterface ();
        oechem.OEConfigure(itf, interfaceData);
        oemedchem.OEConfigureMatchedPairIndexOptions(itf);

        if (oechem.OEParseCommandLine (itf, args, "MatchedPairTransformList")) {
            MMPAnalyze(itf);
        }
    }

    private static String interfaceData =
"!CATEGORY MatchedPairTransformList\n" +
"    !CATEGORY I/O\n" +
"        !PARAMETER -input 1\n" +
"          !ALIAS -i\n" +
"          !TYPE string\n" +
"          !REQUIRED true\n" +
"          !BRIEF Input filename of structures to index\n" +
"          !KEYLESS 1\n" +
"        !END\n" +
"    !END\n" +
"    !CATEGORY options\n" +
"        !PARAMETER -context 1\n" +
"           !ALIAS -c\n" +
"           !TYPE string\n" +
"           !DEFAULT 0\n" +
"           !BRIEF chemistry context to use for the transformation [0|1|2|3|A]\n" +
"        !END\n" +
"        !PARAMETER -printlist 2\n" +
"           !ALIAS -p\n" +
"           !TYPE bool\n" +
"           !DEFAULT 1\n" +
"           !BRIEF print all transforms and matched pairs\n" +
"        !END\n" +
"        !PARAMETER -datafield 3\n" +
"           !ALIAS -d\n" +
"           !TYPE string\n" +
"           !BRIEF sort transforms based on delta change in this property\n" +
"        !END\n" +
"    !END\n" +
"!END";
}

Apply ChEMBL solubility transformations

/*****************************************************************************
 * Copyright (C) 2014 OpenEye Scientific Software, Inc.
 *****************************************************************************
 * Utility to apply ChEMBL18 solubility transforms to an input set of structures
 * ---------------------------------------------------------------------------
 * ChEMBLsolubility input_mols output_mols
 *
 * input_mols: filename of molecules to transform based on analysis
 * output_mols: filename to collect transformed molecules
 *****************************************************************************/
package openeye.examples.oemedchem;

import openeye.oechem.*;
import openeye.oemedchem.*;

public class ChEMBLsolubility {
    public static void main(String[] args) {
        OEInterface itf = new OEInterface(interfaceData, "ChEMBLsolubility", args);

        boolean verbose = itf.GetBool("-verbose");

        // input structure(s) to transform;
        oemolistream ifsmols = new oemolistream();
        if (!ifsmols.open(itf.GetString("-i")))
            oechem.OEThrow.Fatal("Unable to open file for reading: " + itf.GetString("-i"));

        // save output structure(s) to this file;
        oemolostream ofs = new oemolostream();
        if (!ofs.open(itf.GetString("-o")))
            oechem.OEThrow.Fatal("Unable to open file for writing: " + itf.GetString("-i"));

        // request a specific context for the transform activity, here 0-bonds;
        int chemctxt = OEMatchedPairContext.Bond0;
        String askcontext = itf.GetString("-context");
        char ctxt = askcontext.charAt(0);
        switch (ctxt) {
        case '0':
            chemctxt = OEMatchedPairContext.Bond0;
            break;
        case '2':
            chemctxt = OEMatchedPairContext.Bond2;
            break;
        default:
            oechem.OEThrow.Fatal("Invalid context specified: " + askcontext + ", only 0|2 allowed");
            break;
        }

        int minpairs = itf.GetInt("-minpairs");
        if (minpairs > 1)
            oechem.OEThrow.Info("Requiring at least " + minpairs + " matched pairs to apply transformations");

        int irec = 0;
        int ocnt = 0;
        int ototal = 0;
        OEGraphMol mol = new OEGraphMol();
        while (oechem.OEReadMolecule(ifsmols,mol)) {
            ++irec;
            oechem.OETheFunctionFormerlyKnownAsStripSalts(mol);

            ocnt = 0;
            for (OEMolBase outmol : oemedchem.OEApplyChEMBL18SolubilityTransforms(mol, chemctxt, minpairs)) {
                ++ocnt;
                oechem.OEWriteMolecule(ofs, outmol);
            }
            if (ocnt == 0) {
                String name = mol.GetTitle();
                if (name.length() == 0)
                    name = "Record " + irec;
                System.out.println(name + ": did not produce any output");
                System.out.println(oechem.OEMolToSmiles(mol));
            }
            else {
                ototal += ocnt;
                if (verbose)
                    System.out.printf("Record: %d transformation count=%d total mols=%d%n",
                                      irec, ocnt, ototal);
            }
        }

        if (irec == 0)
            oechem.OEThrow.Fatal("No records in input structure file to transform");

        if (ototal == 0)
            oechem.OEThrow.Warning("No transformed structures generated");
        else
            System.out.printf("Input molecules=%d Output molecules=%d%n",
                              irec, ototal);
    }
    private static String interfaceData =
"!BRIEF [-i] <infile1> [-o] <infile2> [ -verbose ] [ -context [0|2]]\n" +
"!PARAMETER -i\n" +
"  !ALIAS -in\n" +
"  !ALIAS -input\n" +
"  !TYPE string\n" +
"  !REQUIRED true\n" +
"  !BRIEF Input file name\n" +
"  !KEYLESS 1\n" +
"!END\n" +
"!PARAMETER -o\n" +
"  !ALIAS -out\n" +
"  !ALIAS -output\n" +
"  !TYPE string\n" +
"  !REQUIRED true\n" +
"  !BRIEF Output file name\n" +
"  !KEYLESS 2\n" +
"!END\n" +
"!PARAMETER -verbose\n" +
"  !ALIAS -v\n" +
"  !TYPE bool\n" +
"  !DEFAULT false\n" +
"  !BRIEF Verbose output\n" +
"!END\n" +
"!PARAMETER -context\n" +
"  !ALIAS -c\n" +
"  !TYPE string\n" +
"  !DEFAULT 0\n" +
"  !BRIEF Chemistry context for output\n" +
"!END\n" +
"!PARAMETER -minpairs\n" +
"  !TYPE int\n" +
"  !DEFAULT 0\n" +
"  !BRIEF require at least -minpairs to apply the transformations (default: all)\n" +
"!END\n";
}

MCS Similarity score reporting

/*****************************************************************************
 * Copyright (C) 2016 OpenEye Scientific Software, Inc.
 *****************************************************************************
 * Utility to index a set of structures and return the highest MCS similarity
 *  structures from an input query structure
 * ---------------------------------------------------------------------------
 * MCSFragDatabase [ -inmols index_mols | -indb index.mcsfrag ]
 *                 [ -query query_file ]
 *                 [ -type bond -limit 10 ]    // 10 Tanimoto bond similarity scores
 *                 [ -verbose 1 ]              // optional verbosity
 *                 [ -outdb outindex.mcsfrag ] // optional save index file
 *
 * index_mols: filename of input molecules to analyze
 * input.mcsfrag: filename of index file to load
 * query_file: filename of query to report MCS similarity results
 *****************************************************************************/
package openeye.examples.oemedchem;

import openeye.oechem.*;
import openeye.oemedchem.*;

public class MCSFragDatabase {
    private static void MCSFragAnalyze(OEInterface itf) {

        OEMCSFragDatabaseOptions mcsopt = new OEMCSFragDatabaseOptions();
        // setup options from command line
        if (!oemedchem.OESetupMCSFragDatabaseOptions(mcsopt, itf))
            oechem.OEThrow.Fatal("Error setting MCS fragment database options");

        // either:
        //   1) input structures to index
        //   2) MCS fragment database to load
        String indb = null;
        String outdb = null;
        String inmols = null;

        boolean verbose = itf.GetBool("-verbose");

        if (!itf.HasString("-inmols") && !itf.HasString("-indb"))
            oechem.OEThrow.Fatal("Must specify either -inmols or -indb");
        else if (itf.HasString("-inmols") && itf.HasString("-indb"))
            oechem.OEThrow.Fatal("Must specify only one of -inmols or -indb");
        else if (itf.HasString("-inmols") && !itf.HasString("-outdb") && !itf.HasString("-query"))
            oechem.OEThrow.Fatal("Must specify one of -outdb or -query with -inmols");
        else if (itf.HasString("-indb") && !itf.HasString("-query"))
            oechem.OEThrow.Fatal("Must specify -query with -indb");

        if (itf.HasString("-outdb")) {
            outdb = itf.GetString("-outdb");
            if (!oemedchem.OEIsMCSFragDatabaseFiletype(outdb))
                oechem.OEThrow.Fatal("Option -outdb must specify a .mcsfrag filename: " + outdb);
        }

        // initialize the MCS fragment index
        OEMCSFragDatabase mcsdb = new OEMCSFragDatabase(mcsopt);

        oemolistream ifsindex = new oemolistream();
        if (itf.HasString("-indb"))
        {
            indb = itf.GetString("-indb");
            if (!oemedchem.OEIsMCSFragDatabaseFiletype(indb))
                oechem.OEThrow.Fatal("Option -indb must specify a .mcsfrag filename: " + indb);
            if (verbose)
                oechem.OEThrow.Info("Loading index from " + indb);
            if (!oemedchem.OEReadMCSFragDatabase(indb, mcsdb))
                oechem.OEThrow.Fatal("Error deserializing MCS fragment database: " + indb);
            if (itf.HasString("-outdb"))
            {
                oechem.OEThrow.Warning("Option -outdb only meaningful with -inmols, ignoring");
                outdb = null;
            }
            if (mcsdb.NumMols() == 0)
                oechem.OEThrow.Fatal("Loaded empty index");

            // index instantiated - drop through to results
        }
        else if (itf.HasString("-inmols"))
        {
            inmols = itf.GetString("-inmols");
            if (!ifsindex.open(inmols))
                oechem.OEThrow.Fatal("Unable to open input file for indexing: " + inmols);
        }

        oemolistream ifsquery = null;
        if (itf.HasString("-query"))
        {
            ifsquery = new oemolistream();
            if (!ifsquery.open(itf.GetString("-query")))
                oechem.OEThrow.Fatal("Unable to open query file: " + itf.GetString("-query"));
        }

        if (inmols != null)
        {
            if (!mcsopt.HasIndexableFragmentHeavyAtomRange())
                oechem.OEThrow.Info("Indexing all fragments");
            else
                System.out.printf("Using index range=%.1f-%.1f%%\n",
                                  mcsopt.GetIndexableFragmentRangeMin(), mcsopt.GetIndexableFragmentRangeMax());

            if ((mcsopt.GetOptions() & OEMatchedPairOptions.AllCuts) != OEMatchedPairOptions.AllCuts)
                System.out.printf("Using nondefault cut option=%0x\n", (mcsopt.GetOptions() & OEMatchedPairOptions.AllCuts));
            if ((mcsopt.GetOptions() & OEMatchedPairOptions.IndexHSites) == 0)
                System.out.printf("Using nondefault HMember off option\n");

            // add molecules to be indexed;
            int maxrec = 0;
            if (itf.HasInt("-maxrec"))
                maxrec = itf.GetInt("-maxrec");
            int vstatus = ( verbose ? 1000 : 0 );

            int record = 0;
            OEGraphMol mol = new OEGraphMol();
            int indexed = 0;
            while (oechem.OEReadMolecule(ifsindex,mol)) {
                ++record;
                int status = mcsdb.AddMol(mol,record);
                if (status == record)
                    ++indexed;
                else if (verbose)
                    oechem.OEThrow.Info("Input structure not added to index, record=" + record + 
                                        " status=" + oemedchem.OEMatchedPairIndexStatusName(status));
                if (maxrec  > 0 && record >= maxrec)
                    break;  // maximum record limit reached
                if (vstatus > 0 && (record % vstatus) == 0)
                    System.out.printf("%d records processed, %d structures indexed\n", record, indexed);
            }
            ifsindex.close();

            if (record == 0)
                oechem.OEThrow.Fatal("No records in input structure file for indexing");

            // indexing complete - drop through to results
        }

        if (mcsdb.NumFragments() == 0)
            oechem.OEThrow.Fatal("No fragments found from indexing, use -fragGe,-fragLe options to extend indexable range");

        if (outdb != null)
        {
            if (verbose)
                oechem.OEThrow.Info("Writing index to " + outdb);
            if (!oemedchem.OEWriteMCSFragDatabase(outdb, mcsdb))
                oechem.OEThrow.Fatal("Error serializing MCS fragment database: " + outdb);
        }

        // return some status information about the index
        System.out.printf("indexed molecules=%d fragments=%d\n",
                          mcsdb.NumMols(),mcsdb.NumFragments());

        // check for (optional) query
        if (ifsquery == null)
            return;

        // process the query options
        int qmaxrec = 1;
        if (itf.HasInt("-qlim"))
            qmaxrec = itf.GetInt("-qlim");

        int numscores = 10;
        if (itf.HasInt("-limit"))
            numscores = itf.GetInt("-limit");

        int cnttype = OEMCSScoreType.BondCount;
        if (itf.HasString("-type"))
        {
            if (itf.GetString("-type").contains("atom"))
                cnttype = OEMCSScoreType.AtomCount;
            else if (itf.GetString("-type").contains("bond"))
                cnttype = OEMCSScoreType.BondCount;
            else
                oechem.OEThrow.Warning("Ignoring unrecognized -type option, expecting atom or bond: " + itf.GetString("-type"));
        }

        // process the queries
        OEGraphMol qmol = new OEGraphMol();
        int qnum = 0;
        while (oechem.OEReadMolecule(ifsquery, qmol)) {
            ++qnum;

            int numhits = 0;
            for (OEMCSMolSimScore score : mcsdb.GetSortedScores(qmol, numscores, 0, 0, true, cnttype)) {
                if (numhits++ == 0)
                    System.out.printf("Query: %d: %s\n", qnum, qmol.GetTitle());

                System.out.printf("\trecord: %6d\ttanimoto_%s_score: %.2f\tmcs_core: %s\n",
                                  score.GetIdx(), 
                                  ((cnttype == OEMCSScoreType.BondCount) ? "bond" : "atom" ), 
                                  score.GetScore(), 
                                  score.GetMCSCore());
            }
            if (numhits == 0)
                oechem.OEThrow.Warning("Query: " + qnum + ": " + qmol.GetTitle() + " - no hits");

            if (qmaxrec > 0 && qnum >= qmaxrec)
                break;
        }
        ifsquery.close();

        if (qnum == 0)
            oechem.OEThrow.Fatal("Error reading query structure(s): " + itf.GetString("-query"));
    }

    public static void main (String[] args) {
        OEInterface itf = new OEInterface ();
        oechem.OEConfigure(itf, interfaceData);
        oemedchem.OEConfigureMCSFragDatabaseOptions(itf);

        if (oechem.OEParseCommandLine (itf, args, "MCSFragDatabase")) {
            MCSFragAnalyze(itf);
        }
    }

    private static String interfaceData =
"!CATEGORY MCSFragmentDatabase\n" +
"    !CATEGORY I/O\n" +
"        !PARAMETER -inmols 1\n" +
"          !TYPE string\n" +
"          !BRIEF Input filename of structure(s) to index\n" +
"        !END\n" +
"        !PARAMETER -indb 2\n" +
"          !TYPE string\n" +
"          !BRIEF Input filename of index file to load\n" +
"        !END\n" +
"        !PARAMETER -query 3\n" +
"          !ALIAS -q\n" +
"          !TYPE string\n" +
"          !BRIEF query structure file to report similarity results against\n" +
"        !END\n" +
"        !PARAMETER -outdb 4\n" +
"          !TYPE string\n" +
"          !BRIEF Output filename of serialized index\n" +
"        !END\n" +
"    !END\n" +
"    !CATEGORY options\n" +
"        !PARAMETER -type 1\n" +
"           !TYPE string\n" +
"           !DEFAULT bond\n" +
"           !BRIEF MCS core counts to use for reported scores: atom or bond\n" +
"        !END\n" +
"        !PARAMETER -limit 2\n" +
"           !TYPE int\n" +
"           !DEFAULT 10\n" +
"           !BRIEF report -limit scores for the query structure\n" +
"        !END\n" +
"        !PARAMETER -verbose 3\n" +
"           !TYPE bool\n" +
"           !DEFAULT 0\n" +
"           !BRIEF generate verbose output\n" +
"        !END\n" +
"        !PARAMETER -maxrec 4\n" +
"           !TYPE int\n" +
"           !DEFAULT 0\n" +
"           !BRIEF limit indexing to -maxrec records from the -inputmols structures\n" +
"        !END\n" +
"        !PARAMETER -qlim 5\n" +
"           !TYPE int\n" +
"           !DEFAULT 1\n" +
"           !BRIEF limit query processing to -qlim records from the -query structures\n" +
"        !END\n" +
"    !END\n" +
"!END";
}