OEMedChem Examples

Bemis Murcko perception

//*****************************************************************************
//* Copyright (C) 2014 OpenEye Scientific Software, Inc.
//*****************************************************************************
//* Utility to fragment the input structures by Bemis-Murcko rules
//* ---------------------------------------------------------------------------
//* BemisMurckoPerception -i input_mols -o output_mols [-uncolor]
//*
//* input_mols: filename of molecules to fragment and uncolor
//* output_mols: filename of input structure with SDData of perceived regions
//* [-uncolor]: optional flag to request uncoloring of output fragment info
//*****************************************************************************
#include <openeye.h>
#include <oesystem.h>
#include <oechem.h>
#include <oemedchem.h>
#include "BemisMurckoPerception.itf"

using namespace OESystem;
using namespace OEChem;
using namespace OEMedChem;

int main(int argc, char *argv[])
{
  OEInterface itf(InterfaceData, argc, argv);

  // optional uncolor flag
  const bool bUncolor = itf.Get<bool>("-uncolor");

  // input structure(s) to transform;
  oemolistream ifs;
  if (!ifs.open(itf.Get<std::string>("-i")))
    OEThrow.Fatal("Unable to open file for reading: %s",
                  itf.Get<std::string>("-i").c_str());

  // save output structure(s) to this file;
  oemolostream ofs;
  if (!ofs.open(itf.Get<std::string>("-o")))
    OEThrow.Fatal("Unable to open file for writing: %s",
                  itf.Get<std::string>("-o").c_str());

  if (!OEIsSDDataFormat(ofs.GetFormat()))
    OEThrow.Fatal("Output file format does not support SD data: %s",
                  itf.Get<std::string>("-o").c_str());

  int irec = 0;
  int ototal = 0;
  OEGraphMol mol, frag;
  while (OEReadMolecule(ifs,mol))
  {
    ++irec;
    OETheFunctionFormerlyKnownAsStripSalts(mol);

    int regions = 0;
    for (OEIter<OEAtomBondSet> absets = OEGetBemisMurcko(mol); absets; ++absets)
    {
      OEAtomBondSet &abset = absets;
      ++regions;
      // create a fragment from the perceived region;
      OESubsetMol(frag, mol, abset);
      if (bUncolor)
      {
        // uncolor the fragment;
        OEUncolorMol(frag);
      }
      std::string smi = OEMolToSmiles(frag);
      // annotate the input molecule with the role information;
      for (OEIter<const OERole> role = abset.GetRoles(); role; ++role)
        OEAddSDData(mol, role->GetName(), smi);

    }
    if (regions == 0)
    {
      std::string name = mol.GetTitle();
      if (name.length() == 0)
        name = "Record " + OENumberToString(irec);
      OEThrow.Warning(name + ": no perceived regions");
    }
    else
    {
      ++ototal;
      OEWriteMolecule(ofs, mol);
    }
  }
  if (irec == 0)
    OEThrow.Fatal("No records in input structure file to perceive");

  if (ototal ==0)
    OEThrow.Warning("No annotated structures generated");

  printf("Input molecules=%d, output annotated %smolecules=%d",
         irec, ((bUncolor) ? "(uncolored) " : ""), ototal);

  return 0;
}

See also

Matched Pair analysis and transformations

//*****************************************************************************
//* Copyright (C) 2014-2015 OpenEye Scientific Software, Inc.
//*****************************************************************************
//* Utility to perform a matched pair analysis on a set of structures
//*  and use the transformations discovered to alter a second set of structures
//* ---------------------------------------------------------------------------
//* MatchedPairTransform [-index] index_mols [ -i ] input_mols [ -o ] output_mols
//*                      [-fragGe min%] [-fragLe max%] [-context 0|1|2|3|A] [-minpairs #]
//*
//* index_mols: filename of input molecules to analyze
//* input_mols: filename of molecules to transform based on analysis
//* output_mols: filename to collect transformed molecules
//*****************************************************************************
#include <openeye.h>
#include <oesystem.h>
#include <oechem.h>
#include <oemedchem.h>
#include "MatchedPairTransform.itf"

using namespace OESystem;
using namespace OEChem;
using namespace OEMedChem;

int main(int argc, char *argv[])
{
  OEInterface itf(InterfaceData);
  OEConfigureMatchedPairIndexOptions(itf);

  if(!OEParseCommandLine(itf, argc, argv))
    OEThrow.Fatal("Unable to interpret command line!");

  // input structures to index;
  oemolistream ifsindex;
  if (!ifsindex.open(itf.Get<std::string>("-index")))
    OEThrow.Fatal("Unable to open index file for reading: " + itf.Get<std::string>("-index"));

  // input structure(s) to transform;
  oemolistream ifsmols;
  if (!ifsmols.open(itf.Get<std::string>("-input")))
    OEThrow.Fatal("Unable to open input file for reading: " + itf.Get<std::string>("-input"));

  // save output structure(s) to this file;
  oemolostream ofs;
  if (!ofs.open(itf.Get<std::string>("-output")))
    OEThrow.Fatal("Unable to open output file for writing: " + itf.Get<std::string>("-output"));

  // create options class with defaults;
  OEMatchedPairAnalyzerOptions opts;
  // apply requested command line options
  if (!OESetupMatchedPairIndexOptions(opts, itf))
    OEThrow.Fatal("Failure in OESetupMatchedPairIndexOptions!");

  if (!itf.Has<float>("-fragGe") && !itf.Has<float>("-fragLe"))
    OEThrow.Info("Using default indexing range");
  else
    printf("Setting index range=%.1f-%.1f%%\n",
           opts.GetIndexableFragmentRangeMin(), opts.GetIndexableFragmentRangeMax());

  // request a specific context for the transform activity, here 0-bonds;
  int chemctxt = OEMatchedPairContext::Bond0;
  std::string askcontext = itf.Get<std::string>("-context");
  char ctxt = askcontext.at(0);
  switch (ctxt)
  {
  case '0':
    chemctxt = OEMatchedPairContext::Bond0;
    break;
  case '1':
    chemctxt = OEMatchedPairContext::Bond1;
    break;
  case '2':
    chemctxt = OEMatchedPairContext::Bond2;
    break;
  case '3':
    chemctxt = OEMatchedPairContext::Bond3;
    break;
  case 'a':
  case 'A':
    chemctxt = OEMatchedPairContext::AllBonds;
    break;
  default:
    OEThrow.Fatal("Invalid context specified:%c, only 0|1|2|3|A allowed",ctxt);
    break;
  }

  // create indexing engine;
  OEMatchedPairAnalyzer mmp(opts);

  bool verbose = itf.Get<bool>("-verbose");

  // add molecules to be indexed;
  int record = 0;
  OEGraphMol mol;
  unsigned int irec = 0;
  while (OEReadMolecule(ifsindex,mol))
  {
    ++irec;
    int status = mmp.AddMol(mol,++record);
    if (verbose && status != record)
      OEThrow.Info("Input structure not added to index, record=%d status=%s",
                   record, OEMatchedPairIndexStatusName(status));
  }

  if (irec == 0)
    OEThrow.Fatal("No records in input structure file for indexing");

  if (!mmp.NumMatchedPairs())
    OEThrow.Fatal("No matched pairs found from indexing, use -fragGe,-fragLe options to extend indexable range");

  // return some status information;
  printf("indexed molecules=%d matched pairs=%d\n",
         mmp.NumMols(),mmp.NumMatchedPairs());

  unsigned int minpairs = itf.Get<unsigned int>("-minpairs");
  if (minpairs > 1u)
    OEThrow.Info("Requiring at least %d matched pairs to apply transformations", minpairs);

  int orec = 0;
  int ocnt = 0;
  int ototal = 0;
  while(OEReadMolecule(ifsmols,mol))
  {
    ++orec ;
    ocnt = 0;
    for (OEIter<OEMolBase> outmol = OEMatchedPairApplyTransforms(mol, mmp, chemctxt, minpairs); outmol; ++outmol)
    {
      ++ocnt ;
      OEWriteMolecule(ofs, outmol);
    }
    ototal += ocnt ;
    if (verbose && ocnt == 0)
    {
      // as minpairs increases, fewer transformed mols are generated - output if requested
      std::string name = mol.GetTitle();
      if (name.empty())
        name = "Record " + OENumberToString(orec);
      OEThrow.Info("%s: did not produce any output", name.c_str());
    }
  }
  if (orec == 0)
    OEThrow.Fatal("No records in input structure file to transform");

  if (ototal == 0)
    OEThrow.Fatal("No transformed structures generated");

  printf("Input molecules=%d Output molecules=%d\n", orec, ototal);
  return ((!ototal) ? 1 : 0);
}

Matched Pair analysis and listing of transformations

//*****************************************************************************
//* Copyright (C) 2014-2015 OpenEye Scientific Software, Inc.
//*****************************************************************************
// * Utility to perform a matched pair analysis on a set of structures
// *  and dump a listing of the transformations derived from matched pairs found
// * ---------------------------------------------------------------------------
// * MatchedPairTransformList index_mols
// *
// * index_mols: filename of input molecules to analyze
//*****************************************************************************
#include <openeye.h>
#include <oesystem.h>
#include <oechem.h>
#include <oemedchem.h>
#include "MatchedPairTransformList.itf"

using namespace OESystem;
using namespace OEChem;
using namespace OEMedChem;

// simple public class to collect/compute stats on matched pairs
class MMPXform
{
public:
  MMPXform(OEMatchedPairTransform xfm,
                        float average, float stddev, int count)
  {
    _xform = xfm;
    _avg = average;
    _std = stddev;
    _num = count;
  }
  MMPXform(const MMPXform &rhs)
  {
    *this = rhs;
  }
  MMPXform& operator=(const MMPXform &rhs)
  {
    if (this == &rhs)
      return *this;

    _xform = rhs._xform;
    _avg = rhs._avg;
    _std = rhs._std;
    _num = rhs._num;
    return *this;
  }

  // defined so we can use std::sort
  bool operator<(const MMPXform& cmp) const
  {
    float lAvg = ((_avg < 0.) ? -(_avg) : _avg);
    float rAvg = ((cmp._avg < 0.) ? -(cmp._avg) : cmp._avg);
    return (lAvg > rAvg); // note: largest->smallest
  }
  const OEMatchedPairTransform& Xform() const { return _xform; }
  OEMatchedPairTransform& GetXform()          { return _xform; }
  float Average() const                       { return _avg;   }
  bool SetAverage(float avg)                  { _avg = avg; return true; }
  float StdDev() const                        { return _std;   }
  int Num() const                             { return _num;   }

  OEMatchedPairTransform _xform;
  float _avg;
  float _std;
  int   _num;
};

// compute some simple statistics for ranking
static float Average(const std::vector<float> data)
{
  if (data.empty())
    return 0.0f;
  if (data.size() == 1)
    return data[0];

  float avg = 0.0f;
  for(std::vector<float>::const_iterator elt = data.begin();
      elt != data.end();
      ++elt)
    avg += *elt;
  return (avg / float(data.size()));
}
static float StdDev(const std::vector<float> data)
{
  if (data.size() <= 1)
    return 0.0f;

  float mean = Average(data);

  double sum = 0.0;
  for(std::vector<float>::const_iterator elt = data.begin();
      elt != data.end();
      ++elt)
  {
    double v = *elt - mean;
    sum += (v * v);
  }
  return (float)sqrt( sum / (double)data.size() );
}

int main(int argc, char *argv[])
{
  OEInterface itf(InterfaceData);
  OEConfigureMatchedPairIndexOptions(itf);

  if(!OEParseCommandLine(itf, argc, argv))
    OEThrow.Fatal("Unable to interpret command line!");

  // input structures to index;
  oemolistream ifs;
  if (!ifs.open(itf.Get<std::string>("-input")))
    OEThrow.Fatal("Unable to open input file for reading: %s", itf.Get<std::string>("-input").c_str());

  // request a specific context for the transform activity, here 0-bonds;
  int chemctxt = OEMatchedPairContext::Bond0;
  std::string askcontext = itf.Get<std::string>("-context");
  char ctxt = askcontext.at(0);
  switch (ctxt)
  {
  case '0':
    chemctxt = OEMatchedPairContext::Bond0;
    break;
  case '1':
    chemctxt = OEMatchedPairContext::Bond1;
    break;
  case '2':
    chemctxt = OEMatchedPairContext::Bond2;
    break;
  case '3':
    chemctxt = OEMatchedPairContext::Bond3;
    break;
  case 'a':
  case 'A':
    chemctxt = OEMatchedPairContext::AllBonds;
    break;
  default:
    OEThrow.Fatal("Invalid context specified:%c, only 0|1|2|3|A allowed", ctxt);
    break;
  }

  bool bPrintTransforms = itf.Get<bool>("-printlist");
  // if a data field was specified, retreive the SD data field name
  std::string field;
  if (itf.Has<std::string>("-datafield"))
    field = itf.Get<std::string>("-datafield");

  // create options class with defaults;
  OEMatchedPairAnalyzerOptions opts;
  // apply requested command line options
  if (!OESetupMatchedPairIndexOptions(opts, itf))
    OEThrow.Fatal("Failure in OESetupMatchedPairIndexOptions!");

  if (!itf.Has<float>("-fragGe") && !itf.Has<float>("-fragLe"))
    OEThrow.Info("Using default indexing range");
  else
    printf("Setting index range=%.1f-%.1f%%\n",
           opts.GetIndexableFragmentRangeMin(), opts.GetIndexableFragmentRangeMax());

  if (field.empty() && !bPrintTransforms) {
    OEThrow.Info("Specify -datafield or -printlist, otherwise nothing to do!");
    return 1;
  }

  // create indexing engine;
  OEMatchedPairAnalyzer mmp(opts);

  // add molecules to be indexed;
  bool bFoundData = false;
  OEGraphMol mol;
  int record = 0;
  while (OEReadMolecule(ifs,mol))
  {
    int status = mmp.AddMol(mol,++record);
    if (status != record)
    {
      OEThrow.Info("Error adding input structure to index, record=%d status=%s",
                   record, OEMatchedPairIndexStatusName(status));
    }
    else if (!field.empty() && OEHasSDData(mol,field))
    {
      float fvalue;
      if (OEStringToNumber(OEGetSDData(mol,field),fvalue))
        bFoundData = true;
      else
        OEThrow.Fatal("%d: Non-numeric data for field %s found, %s",
                      record, field.c_str(), OEGetSDData(mol,field).c_str());
    }
  }

  if (mmp.NumMols() == 0)
    OEThrow.Fatal("No records in input structure file were indexed");

  if (!field.empty() && !bFoundData)
    OEThrow.Fatal("No data found for requested field, %s", field.c_str());

  if (mmp.NumMatchedPairs() == 0)
    OEThrow.Fatal("No matched pairs found from indexing, use -fragGe,-fragLe options to extend index range");

  // controls how transforms are extracted (direction and allowed properties)
  int extractMode = (OEMatchedPairTransformExtractMode::Sorted +
                     OEMatchedPairTransformExtractMode::NoSMARTS);

  // now walk the transforms from the indexed matched pairs
  std::vector<MMPXform> xforms ;

  int xfmidx = 0;
  for (OEIter<OEMatchedPairTransform> xformitor = OEMatchedPairGetTransforms(mmp,chemctxt,extractMode);
       xformitor;
       ++xformitor)
  {
    const OEMatchedPairTransform &mmpxform = *xformitor;
    ++xfmidx;

    if (bPrintTransforms)
      printf("%2d %s\n", xfmidx, mmpxform.GetTransform().c_str());

    int mmpidx = 0;
    std::vector<float> prop;
    for (OEIter<OEMatchedPair> mmpitor = mmpxform.GetMatchedPairs(); mmpitor; ++mmpitor)
    {
      const OEMatchedPair &mmppair = *mmpitor;

      ++mmpidx;
      char buff[100];
      sprintf(buff,"\t%2d: (%2d,%2d)",mmpidx,mmppair.FromIndex(),mmppair.ToIndex());
      std::string mmpinfo = buff;

      for (OEIter<const std::string> tagitor = mmppair.GetDataTags(); tagitor; ++tagitor)
      {
        const std::string &tag = *tagitor;
        mmpinfo = mmpinfo + std::string(" " + tag + "=(" +  mmppair.GetFromSDData(tag) + "," + mmppair.GetToSDData(tag) + ")");

        if (tag.compare(field) == 0) {
          float fromValue;
          if (!OEStringToNumber(mmppair.GetFromSDData(tag),fromValue))
            fromValue = 0.0;
          float toValue;
          if (!OEStringToNumber(mmppair.GetToSDData(tag),toValue))
            toValue = 0.0;
          prop.push_back(toValue - fromValue);
        }

      }
      if (bPrintTransforms)
        printf("%s\n",mmpinfo.c_str());
    }
    // skip if property not found
    if (!prop.empty()) {
      // add
      xforms.push_back(MMPXform(mmpxform, (float)Average(prop), (float)StdDev(prop), (int)prop.size()));
    }
  }

  if (field.empty())
    return 0; // done

  if (xforms.empty())
    OEThrow.Error("No matched pairs found with %s data",field.c_str());

  // sort the transforms by largest absolute delta property value
  std::sort(xforms.begin(), xforms.end());

  printf("\n*** Transforms sorted by delta %s\n", field.c_str());

  int idx = 0;
  for (std::vector<MMPXform>::iterator xfmitor = xforms.begin();
       xfmitor != xforms.end();
       ++xfmitor)
  {
    MMPXform &xfm = *xfmitor;

    ++idx;
    if ((extractMode & OEMatchedPairTransformExtractMode::NoSMARTS) != 0) {
      // not 'invertable' if SMARTS qualifiers were applied
      if (xfm.Average() < 0.0) {
        xfm.SetAverage(-1.0f * xfm.Average());
        xfm.GetXform().Invert();
      }
    }
    printf("%2d %s=(avg=%.2f,stdev=%.2f,num=%d) %s\n",idx,
           field.c_str(),
           xfm.Average(),
           xfm.StdDev(),
           xfm.Num(),
           xfm.Xform().GetTransform().c_str());
  }
  return 0;
}

Apply ChEMBL solubility transformations

//*****************************************************************************
//* Copyright (C) 2014 OpenEye Scientific Software, Inc.
//*****************************************************************************
//* Utility to apply ChEMBL18 solubility transforms to an input set of structures
//* ---------------------------------------------------------------------------
//* ChEMBLsolubility [-i] input_mols [-o] output_mols [-verbose] [-context #] [-minpairs #]
//*
//* input_mols: filename of molecules to transform based on analysis
//* output_mols: filename to collect transformed molecules
//* [-verbose]: optional flag to request verbose progress
//* [-context #]: optional flag to request a specific chemistry context
//* [-minpairs #]: optional flag to request a minimum number of pairs to apply transforms
//*****************************************************************************
#include <openeye.h>
#include <oesystem.h>
#include <oechem.h>
#include <oemedchem.h>
#include "ChEMBLsolubility.itf"

using namespace OESystem;
using namespace OEChem;
using namespace OEMedChem;

int main(int argc, char *argv[])
{
  OEInterface itf(InterfaceData, argc, argv);

  bool verbose = itf.Get<bool>("-verbose");

  // input structure(s) to transform;
  oemolistream ifs;
  if (!ifs.open(itf.Get<std::string>("-i")))
    OEThrow.Fatal("Unable to open file for reading: %s",itf.Get<std::string>("-i").c_str());

  // save output structure(s) to this file;
  oemolostream ofs;
  if (!ofs.open(itf.Get<std::string>("-o")))
    OEThrow.Fatal("Unable to open file for writing: %s",itf.Get<std::string>("-o").c_str());

  // request a specific context for the transform activity, here 0-bonds;
  int chemctxt = OEMatchedPairContext::Bond0;
  std::string askcontext = itf.Get<std::string>("-context");
  char ctxt = askcontext.at(0);
  switch (ctxt) {
  case '0':
    chemctxt = OEMatchedPairContext::Bond0;
    break;
  case '2':
    chemctxt = OEMatchedPairContext::Bond2;
    break;
  default:
    OEThrow.Fatal("Invalid context specified: %s, only 0|2 allowed",askcontext.c_str());
    break;
  }

  unsigned int minpairs = itf.Get<unsigned int>("-minpairs");
  if (minpairs > 1u)
    OEThrow.Info("Requiring at least %d matched pairs to apply transformations", minpairs);

  int irec = 0;
  int ocnt = 0;
  int ototal = 0;
  OEGraphMol mol;
  while (OEReadMolecule(ifs,mol))
  {
    ++irec;
    OETheFunctionFormerlyKnownAsStripSalts(mol);

    ocnt = 0;
    for (OEIter<OEMolBase> outmol = OEApplyChEMBL18SolubilityTransforms(mol, chemctxt, minpairs); outmol; ++outmol)
    {
      ++ocnt;
      OEWriteMolecule(ofs, outmol);
    }
    if (ocnt == 0)
    {
      std::string name = mol.GetTitle();
      if (name.length() == 0)
        name = "Record " + OENumberToString(irec);
      printf("%s: did not produce any output", name.c_str());
      printf("%s\n", OEMolToSmiles(mol).c_str());
    }
    else
    {
      ototal += ocnt;
      if (verbose)
        printf("Record: %d transformation count=%d total mols=%d\n",
               irec, ocnt, ototal);
    }
  }

  if (irec == 0)
    OEThrow.Fatal("No records in input structure file to transform");

  if (ototal == 0)
    OEThrow.Warning("No transformed structures generated");
  else
    printf("Input molecules=%d Output molecules=%d", irec, ototal);

  return ((!ototal) ? 1 : 0);
}