OEMedChem Examples

Bemis Murcko perception

//*****************************************************************************
//* Copyright (C) 2014 OpenEye Scientific Software, Inc.
//*****************************************************************************
//* Utility to fragment the input structures by Bemis-Murcko rules
//* ---------------------------------------------------------------------------
//* BemisMurckoPerception input_mols output_mols [uncolor]
//*
//* input_mols: filename of molecules to fragment and uncolor
//* output_mols: filename of input structure with SDData of perceived regions
//* [uncolor]: optional 4th arg to request uncoloring of output fragment info
//*****************************************************************************
using OpenEye.OEChem;
using OpenEye.OEMedChem;
using System;

public class BemisMurckoPerception
{
    public static void Main(string[] args)
    {
        OEInterface itf = new OEInterface(interfaceData, "BemisMurckoPerception", args);

        // flag on command line indicates uncoloring option or not;
        bool bUncolor = itf.GetBool("-uncolor");

        // input structure(s) to transform;
        oemolistream ifsmols = new oemolistream();
        if (!ifsmols.open(itf.GetString("-i")))
            OEChem.OEThrow.Fatal("Unable to open file for reading: " + itf.GetString("-i"));

        // save output structure(s) to this file;
        oemolostream ofs = new oemolostream();
        if (!ofs.open(itf.GetString("-o")))
            OEChem.OEThrow.Fatal("Unable to open file for writing: " + itf.GetString("-o"));
        if (!OEChem.OEIsSDDataFormat(ofs.GetFormat()))
            OEChem.OEThrow.Fatal("Output file format does not support SD data: " + itf.GetString("-o"));

        int irec = 0;
        int ototal = 0;
        OEGraphMol frag = new OEGraphMol();
        foreach (OEGraphMol mol in ifsmols.GetOEGraphMols())
        {
            ++irec;
            OEChem.OETheFunctionFormerlyKnownAsStripSalts(mol);

            int regions = 0;
            foreach (OEAtomBondSet abset in OEMedChem.OEGetBemisMurcko(mol))
            {
                ++regions;
                // create a fragment from the perceived region;
                OEChem.OESubsetMol(frag, mol, abset);
                if (bUncolor)
                {
                    // uncolor the fragment;
                    OEChem.OEUncolorMol(frag);
                }
                string smi = OEChem.OEMolToSmiles(frag);
                // annotate the input molecule with the role information;
                foreach (OERole role in abset.GetRoles())
                    OEChem.OEAddSDData(mol, role.GetName(), smi);

            }
            if (regions == 0)
            {
                string name = mol.GetTitle();
                if (name.Length == 0)
                    name = "Record " + irec.ToString();
                OEChem.OEThrow.Warning(name + ": no perceived regions");
            }
            else
            {
                ++ototal;
                OEChem.OEWriteMolecule(ofs, mol);
            }
        }
        if (irec == 0)
            OEChem.OEThrow.Fatal("No records in input structure file to perceive");

        if (ototal == 0)
            OEChem.OEThrow.Warning("No annotated structures generated");

        Console.WriteLine("Input molecules={0:D}, output annotated {1:S}molecules={2:D}",
                          irec, ((bUncolor) ? "(uncolored) " : ""), ototal);

    }
    private static string interfaceData = @"
!BRIEF [-i] <infile1> [-o] <infile2> [ -uncolor ]
!PARAMETER -i
  !ALIAS -in
  !ALIAS -input
  !TYPE string
  !REQUIRED true
  !BRIEF Input structure file name
  !KEYLESS 1
!END
!PARAMETER -o
  !ALIAS -out
  !ALIAS -output
  !TYPE string
  !REQUIRED true
  !BRIEF Output SD file name
  !KEYLESS 2
!END
!PARAMETER -uncolor
  !ALIAS -u
  !TYPE bool
  !DEFAULT false
  !BRIEF Uncolor output molecules
!END
";
}

See also

Matched Pair analysis and transformations

//*****************************************************************************
//* Copyright (C) 2014-2015 OpenEye Scientific Software, Inc.
//*****************************************************************************
//* Utility to perform a matched pair analysis on a set of structures
//*  and use the transformations discovered to alter a second set of structures
//* ---------------------------------------------------------------------------
//* MatchedPairTransform index_mols input_mols output_mols
//*
//* index_mols: filename of input molecules to analyze
//* input_mols: filename of molecules to transform based on analysis
//* output_mols: filename to collect transformed molecules
//*****************************************************************************
using OpenEye.OEChem;
using OpenEye.OEMedChem;
using System;

public class MatchedPairTransform
{
    private static void Analyze(OEInterface itf)
    {
        // input structures to index;
        oemolistream ifsindex = new oemolistream();
        if (!ifsindex.open(itf.GetString("-index")))
            OEChem.OEThrow.Fatal("Unable to open index file for reading: " + itf.GetString("-index"));

        // input structure(s) to transform;
        oemolistream ifsmols = new oemolistream();
        if (!ifsmols.open(itf.GetString("-input")))
            OEChem.OEThrow.Fatal("Unable to open input file for reading: " + itf.GetString("-input"));

        // save output structure(s) to this file;
        oemolostream ofs = new oemolostream();
        if (!ofs.open(itf.GetString("-output")))
            OEChem.OEThrow.Fatal("Unable to open output file for writing: " + itf.GetString("-output"));

        // create options class with defaults;
        OEMatchedPairAnalyzerOptions opts = new OEMatchedPairAnalyzerOptions();
        // setup options from command line
        if (!OEMedChem.OESetupMatchedPairIndexOptions(opts, itf))
            OEChem.OEThrow.Fatal("Error setting matched pair indexing options!");

        if (!itf.HasFloat("-fragGe") && !itf.HasFloat("-fragLe"))
            OEChem.OEThrow.Info("Using default indexing range");
        else
            Console.WriteLine("Setting index range={0:F}-{1:F}%",
                              opts.GetIndexableFragmentRangeMin(), opts.GetIndexableFragmentRangeMax());

        // request a specific context for the transform activity, here 0-bonds;
        int chemctxt = OEMatchedPairContext.Bond0;
        String askcontext = itf.GetString("-context");
        char ctxt = askcontext[0];
        switch (ctxt)
        {
            case '0':
                chemctxt = OEMatchedPairContext.Bond0;
                break;
            case '1':
                chemctxt = OEMatchedPairContext.Bond1;
                break;
            case '2':
                chemctxt = OEMatchedPairContext.Bond2;
                break;
            case '3':
                chemctxt = OEMatchedPairContext.Bond3;
                break;
            case 'a':
            case 'A':
                chemctxt = OEMatchedPairContext.AllBonds;
                break;
            default:
                OEChem.OEThrow.Fatal("Invalid context specified: " + askcontext + ", only 0|1|2|3|A allowed");
                break;
        }

        // create indexing engine;
        OEMatchedPairAnalyzer mmp = new OEMatchedPairAnalyzer(opts);

        bool verbose = itf.GetBool("-verbose");

        // add molecules to be indexed;
        int record = 0;
        foreach (OEGraphMol mol in ifsindex.GetOEGraphMols())
        {
            int status = mmp.AddMol(mol, ++record);
            if (verbose && status != record)
                OEChem.OEThrow.Info("Input structure not added to index, record=" + record +
                                    " status=" + OEMedChem.OEMatchedPairIndexStatusName(status));
        }

        if (mmp.NumMols() == 0)
            OEChem.OEThrow.Fatal("No records in input structure file for indexing");

        if (mmp.NumMatchedPairs() == 0)
            OEChem.OEThrow.Fatal("No matched pairs found from indexing, use -fragGe,-fragLe options to extend index range");

        // return some status information;
        Console.WriteLine("indexed molecules={0:D} matched pairs={1:D}",
                          mmp.NumMols(), mmp.NumMatchedPairs());


        uint minpairs = (uint)itf.GetInt("-minpairs");
        if (minpairs > 1)
          Console.WriteLine("Requiring at least {0:D} matched pairs to apply transformations", minpairs);

        int orec = 0;
        int ocnt = 0;
        int ototal = 0;
        foreach (OEGraphMol mol in ifsmols.GetOEGraphMols())
        {
            ++orec;
            ocnt = 0;
            foreach (OEMolBase outmol in OEMedChem.OEMatchedPairApplyTransforms(mol, mmp, chemctxt, minpairs))
            {
                ++ocnt;
                OEChem.OEWriteMolecule(ofs, outmol);
            }
            ototal += ocnt;
            if (verbose && ocnt == 0)
            {
                String name = mol.GetTitle();
                if (name.Length == 0)
                    name = "Record " + orec.ToString();
                OEChem.OEThrow.Info(name + ": did not produce any output");
            }
        }
        if (orec == 0)
            OEChem.OEThrow.Fatal("No records in input structure file to transform");

        if (ototal == 0)
            OEChem.OEThrow.Fatal("No transformed structures generated");

        Console.WriteLine("Input molecules={0:D} Output molecules={1:D}", orec, ocnt);
    }

    public static void Main(String[] args)
    {
        OEInterface itf = new OEInterface();
        OEChem.OEConfigure(itf, InterfaceData);
        OEMedChem.OEConfigureMatchedPairIndexOptions(itf);

        if (OEChem.OEParseCommandLine(itf, args, "MatchedPairTransform"))
            Analyze(itf);
    }

    private static String InterfaceData = @"
!CATEGORY MatchedPairTransform

    !CATEGORY I/O
        !PARAMETER -index 1
          !TYPE string
          !REQUIRED true
          !BRIEF Input filename of structures to index
          !KEYLESS 1
        !END

        !PARAMETER -input 2
          !ALIAS -i
          !ALIAS -in
          !TYPE string
          !REQUIRED true
          !BRIEF Input filename of structures to process based on matched pairs discovered from indexing
          !KEYLESS 2
        !END

        !PARAMETER -output 3
          !ALIAS -o
          !ALIAS -out
          !TYPE string
          !REQUIRED true
          !BRIEF Output filename
          !KEYLESS 3
        !END
    !END

    !CATEGORY options
        !PARAMETER -context 1
           !ALIAS -c
           !TYPE string
           !DEFAULT 0
           !BRIEF chemistry context to use for the transformation [0|1|2|3|A]
        !END
        !PARAMETER -minpairs 2
           !TYPE int
           !DEFAULT 0
           !BRIEF require at least -minpairs to apply the transformations (default: all)
        !END
        !PARAMETER -verbose 3
           !TYPE bool
           !DEFAULT 0
           !BRIEF generate verbose output
        !END
    !END
!END
";
}

Matched Pair analysis and listing of transformations

//*****************************************************************************
//* Copyright (C) 2014-2015 OpenEye Scientific Software, Inc.
//*****************************************************************************
//* Utility to perform a matched pair analysis on a set of structures
//*  and dump a listing of the transformations derived from matched pairs found
//* ---------------------------------------------------------------------------
//* MatchedPairTransformList index_mols
//*
//* index_mols: filename of input molecules to analyze
//*****************************************************************************
using OpenEye.OEChem;
using OpenEye.OEMedChem;
using System;
using System.Collections.Generic;

public class MatchedPairTransformList
{
    // simple internal class to rank transform output
    public class MMPXform : IComparable<MMPXform>
    {
        public OEMatchedPairTransform xform;
        public float avg;
        public float std;
        public int num;

        public MMPXform(OEMatchedPairTransform xfm,
                        float average, float stddev, int count)
        {
            xform = xfm;
            avg = average;
            std = stddev;
            num = count;
        }

        public int CompareTo(MMPXform another)
        {
            // sort descending by absolute value of the average
            double lAvg = Math.Abs(this.avg);
            double rAvg = Math.Abs(another.avg);

            int retval = 0;
            if (lAvg > rAvg)
                retval = -1;
            else if (lAvg < rAvg)
                retval = 1;
            return retval;
        }
    }

    private static float getAverage(List<float> floatList)
    {
        if (floatList.Count == 0)
            return 0;

        float sum = 0;
        foreach (float value in floatList)
        {
            sum += value;
        }
        return sum / floatList.Count;
    }

    private static float getStdDev(List<float> floatList)
    {
        if (floatList.Count <= 1)
            return 0;

        float average = getAverage(floatList);
        float sumOfDerivation = 0;
        foreach (float value in floatList)
        {
            sumOfDerivation += (value) * (value);
        }
        double sumOfDerivationAverage = sumOfDerivation / floatList.Count; // population, not sample
        return (float)Math.Sqrt(sumOfDerivationAverage - (average * average));
    }

    private static void MMPAnalyze(OEInterface itf)
    {
        // input structures to index;
        oemolistream ifs = new oemolistream();
        if (!ifs.open(itf.GetString("-input")))
            OEChem.OEThrow.Fatal("Unable to open index file for reading: " + itf.GetString("-input"));

        // request a specific context for the transform activity, here 0-bonds;
        int chemctxt = OEMatchedPairContext.Bond0;
        String askcontext = itf.GetString("-context");
        char ctxt = askcontext[0];
        switch (ctxt)
        {
            case '0':
                chemctxt = OEMatchedPairContext.Bond0;
                break;
            case '1':
                chemctxt = OEMatchedPairContext.Bond1;
                break;
            case '2':
                chemctxt = OEMatchedPairContext.Bond2;
                break;
            case '3':
                chemctxt = OEMatchedPairContext.Bond3;
                break;
            case 'a':
            case 'A':
                chemctxt = OEMatchedPairContext.AllBonds;
                break;
            default:
                OEChem.OEThrow.Fatal("Invalid context specified: " +
                                     askcontext + ", only 0|1|2|3|A allowed");
                break;
        }

        bool bPrintTransforms = itf.GetBool("-printlist");
        // if a data field was specified, retreive the SD data field name
        String field = null;
        if (itf.HasString("-datafield"))
            field = itf.GetString("-datafield");

        // create options class with defaults;
        OEMatchedPairAnalyzerOptions opts = new OEMatchedPairAnalyzerOptions();
        // setup options from command line
        if (!OEMedChem.OESetupMatchedPairIndexOptions(opts, itf))
            OEChem.OEThrow.Fatal("Error setting matched pair indexing options!");

        if (!itf.HasFloat("-fragGe") && !itf.HasFloat("-fragLe"))
            OEChem.OEThrow.Info("Using default indexing range");
        else
            Console.WriteLine("Setting index range={0:F}-{1:F}%",
                              opts.GetIndexableFragmentRangeMin(), opts.GetIndexableFragmentRangeMax());

        if (field == null && !bPrintTransforms)
        {
            OEChem.OEThrow.Info("Specify -datafield or -printlist, otherwise nothing to do!");
            return;
        }

        // create indexing engine;
        OEMatchedPairAnalyzer mmp = new OEMatchedPairAnalyzer(opts);

        // add molecules to be indexed;
        bool bFoundData = false;
        int record = 0;
        foreach (OEGraphMol mol in ifs.GetOEGraphMols())
        {
            int status = mmp.AddMol(mol, ++record);
            if (status != record)
                Console.WriteLine("Error adding input structure to index, record={0:D} status={1:S}",
                                  record, OEMedChem.OEMatchedPairIndexStatusName(status));
            else if (field != null && OEChem.OEHasSDData(mol, field))
            {
                // validate that data field value is numeric
                bool bNumeric = true;
                try
                {
                    float.Parse(OEChem.OEGetSDData(mol, field));
                }
                catch (System.FormatException)
                {
                    bNumeric = false;
                }

                if (bNumeric)
                    bFoundData = true;
                else
                    OEChem.OEThrow.Fatal(Convert.ToString(record) + ": Non-numeric data for field " + field +
                                         " found, " + OEChem.OEGetSDData(mol, field));
            }
        }

        if (mmp.NumMols() == 0)
            OEChem.OEThrow.Fatal("No records in input structure file for indexing");


        if (field != null && !bFoundData)
            OEChem.OEThrow.Fatal("No data found for requested field, " + field);

        if (mmp.NumMatchedPairs() == 0)
            OEChem.OEThrow.Fatal("No matched pairs found from indexing, use -fragGe,-fragLe options to extend index range");

        // controls how transforms are extracted (direction and allowed properties)
        uint extractMode = (OEMatchedPairTransformExtractMode.Sorted +
                            OEMatchedPairTransformExtractMode.NoSMARTS);

        // now walk the transforms from the indexed matched pairs
        List<MMPXform> xforms = new List<MMPXform>();

        int xfmidx = 0;
        foreach (OEMatchedPairTransform mmpxform in OEMedChem.OEMatchedPairGetTransforms(mmp,
                                                                                         chemctxt, extractMode))
        {
            ++xfmidx;
            if (bPrintTransforms)
                Console.WriteLine("{0:D} {1}", xfmidx, mmpxform.GetTransform());

            int mmpidx = 0;
            List<float> prop = new List<float>();
            foreach (OEMatchedPair mmppair in mmpxform.GetMatchedPairs())
            {
                ++mmpidx;
                String mmpinfo = String.Format("\t{0:D}: ({1:D},{2:D})",
                                               mmpidx, mmppair.FromIndex(), mmppair.ToIndex());

                foreach (String tag in mmppair.GetDataTags())
                {
                    mmpinfo = mmpinfo + String.Format(" {0}=({1},{2})",
                                                      tag,
                                                      mmppair.GetFromSDData(tag),
                                                      mmppair.GetToSDData(tag));
                    if (tag.Equals(field))
                    {
                        float fromValue = float.Parse(mmppair.GetFromSDData(tag));
                        float toValue = float.Parse(mmppair.GetToSDData(tag));
                        prop.Add(toValue - fromValue);
                    }

                }
                if (bPrintTransforms)
                    Console.WriteLine(mmpinfo);
            }
            // skip if property not found
            if (prop.Count != 0)
            {
                // add
                MMPXform item = new MMPXform(mmpxform,
                        MatchedPairTransformList.getAverage(prop),
                        (float)MatchedPairTransformList.getStdDev(prop),
                        prop.Count);
                xforms.Add(item);
            }
        }
        if (field == null)
            return;

        if (xforms.Count == 0)
            OEChem.OEThrow.Error("No matched pairs found with " + field + " data");

        // sort the transforms by largest absolute delta property value
        xforms.Sort();

        Console.WriteLine("\n*** Transforms sorted by delta {0}", field);

        int idx = 0;
        foreach (MMPXform xfm in xforms)
        {
            ++idx;
            if ((extractMode & OEMatchedPairTransformExtractMode.NoSMARTS) != 0)
            {
                // not 'invertable' if SMARTS qualifiers were applied
                if (xfm.avg < 0.0F)
                {
                    xfm.avg = -1.0F * xfm.avg;
                    xfm.xform.Invert();
                }
            }
            Console.WriteLine("{0,2:D} {1}=(avg={2:F2},stdev={3:F2},num={4:D}) {5}", idx,
                              field,
                              xfm.avg,
                              xfm.std,
                              xfm.num,
                              xfm.xform.GetTransform());
        }

    }

    public static void Main(String[] args)
    {
        OEInterface itf = new OEInterface();
        OEChem.OEConfigure(itf, InterfaceData);
        OEMedChem.OEConfigureMatchedPairIndexOptions(itf);

        if (OEChem.OEParseCommandLine(itf, args, "MatchedPairTransformList"))
            MMPAnalyze(itf);
    }

    private static String InterfaceData = @"
!CATEGORY MatchedPairTransformList

    !CATEGORY I/O
        !PARAMETER -input 1
          !ALIAS -i
          !TYPE string
          !REQUIRED true
          !BRIEF Input filename of structures to index
          !KEYLESS 1
        !END
    !END

    !CATEGORY options
        !PARAMETER -context 1
           !ALIAS -c
           !TYPE string
           !DEFAULT 0
           !BRIEF chemistry context to use for the transformation [0|1|2|3|A]
        !END

        !PARAMETER -printlist 2
           !ALIAS -p
           !TYPE bool
           !DEFAULT 1
           !BRIEF print all transforms and matched pairs
        !END

        !PARAMETER -datafield 3
           !ALIAS -d
           !TYPE string
           !BRIEF sort transforms based on delta change in this property
        !END
    !END
!END
";
}

Apply ChEMBL solubility transformations

//*****************************************************************************
//* Copyright (C) 2014 OpenEye Scientific Software, Inc.
//*****************************************************************************
//* Utility to apply ChEMBL18 solubility transforms to an input set of structures
//* ---------------------------------------------------------------------------
//* ChEMBLsolubility input_mols output_mols
//*
//* input_mols: filename of molecules to transform based on analysis
//* output_mols: filename to collect transformed molecules
//*****************************************************************************
using OpenEye.OEChem;
using OpenEye.OEMedChem;
using System;

public class ChEMBLsolubility
{
    public static void Main(String[] args)
    {
        OEInterface itf = new OEInterface(interfaceData, "ChEMBLsolubility", args);

        bool verbose = itf.GetBool("-verbose");

        // input structure(s) to transform;
        oemolistream ifsmols = new oemolistream();
        if (!ifsmols.open(itf.GetString("-i")))
            OEChem.OEThrow.Fatal("Unable to open file for reading: " + itf.GetString("-i"));

        // save output structure(s) to this file;
        oemolostream ofs = new oemolostream();
        if (!ofs.open(itf.GetString("-o")))
            OEChem.OEThrow.Fatal("Unable to open file for writing: " + itf.GetString("-o"));

        // request a specific context for the transform activity, here 0-bonds;
        int chemctxt = OEMatchedPairContext.Bond0;
        String askcontext = itf.GetString("-context");
        char ctxt = askcontext[0];
        switch (ctxt)
        {
            case '0':
                chemctxt = OEMatchedPairContext.Bond0;
                break;
            case '2':
                chemctxt = OEMatchedPairContext.Bond2;
                break;
            default:
                OEChem.OEThrow.Fatal("Invalid context specified: " + askcontext + ", only 0|2 allowed");
                break;
        }

        uint minpairs = (uint)itf.GetInt("-minpairs");
        if (minpairs > 1)
          Console.WriteLine("Requiring at least {0:D} matched pairs to apply transformations", minpairs);

        int irec = 0;
        int ocnt = 0;
        int ototal = 0;
        foreach (OEGraphMol mol in ifsmols.GetOEGraphMols())
        {
            ++irec;
            OEChem.OETheFunctionFormerlyKnownAsStripSalts(mol);

            ocnt = 0;
            foreach (OEMolBase outmol in OEMedChem.OEApplyChEMBL18SolubilityTransforms(mol, chemctxt, minpairs))
            {
                ++ocnt;
                OEChem.OEWriteMolecule(ofs, outmol);
            }
            if (ocnt == 0)
            {
                String name = mol.GetTitle();
                if (name.Length == 0)
                    name = "Record " + irec.ToString();
                Console.WriteLine(name + ": did not produce any output");
                Console.WriteLine(OEChem.OEMolToSmiles(mol));
            }
            else
            {
                ototal += ocnt;
                if (verbose)
                    Console.WriteLine("Record: {0:D} transformation count={1:D} total mols={2:D}",
                                      irec, ocnt, ototal);
            }
        }

        if (irec == 0)
            OEChem.OEThrow.Fatal("No records in input structure file to transform");

        if (ototal == 0)
            OEChem.OEThrow.Warning("No transformed structures generated");
        else
            Console.WriteLine("Input molecules={0:D} Output molecules={1:D}",
                              irec, ototal);
    }
    private static String interfaceData = @"
!BRIEF [-i] <infile1> [-o] <infile2> [ -verbose ] [ -context [0|2]]
!PARAMETER -i
  !ALIAS -in
  !ALIAS -input
  !TYPE string
  !REQUIRED true
  !BRIEF Input file name
  !KEYLESS 1
!END
!PARAMETER -o
  !ALIAS -out
  !ALIAS -output
  !TYPE string
  !REQUIRED true
  !BRIEF Output file name
  !KEYLESS 2
!END
!PARAMETER -verbose
  !ALIAS -v
  !TYPE bool
  !DEFAULT false
  !BRIEF Verbose output
!END
!PARAMETER -context
  !ALIAS -c
  !TYPE string
  !DEFAULT 0
  !BRIEF Chemistry context for output
!END
!PARAMETER -minpairs 2
   !TYPE int
   !DEFAULT 0
   !BRIEF require at least -minpairs to apply the transformations (default: all)
!END";
}