OEMedChem Examples

Bemis Murcko perception

/*
(C) 2017 OpenEye Scientific Software Inc. All rights reserved.

TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
provided to current licensees or subscribers of OpenEye products or
SaaS offerings (each a "Customer").
Customer is hereby permitted to use, copy, and modify the Sample Code,
subject to these terms. OpenEye claims no rights to Customer's
modifications. Modification of Sample Code is at Customer's sole and
exclusive risk. Sample Code may require Customer to have a then
current license or subscription to the applicable OpenEye offering.
THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be
liable for any damages or liability in connection with the Sample Code
or its use.
*/
//*****************************************************************************
//* Utility to fragment the input structures by Bemis-Murcko rules
//* ---------------------------------------------------------------------------
//* BemisMurckoPerception.py [ -uncolor ] [-i] <input_mols> [-o] <output_mols>
//*
//* input_mols: filename of molecules to fragment and uncolor
//* output_mols: filename of input structure with SDData of perceived regions
//* [ -uncolor ]: optional flag to request uncoloring of output fragment info
//*****************************************************************************
#include <openeye.h>
#include <oesystem.h>
#include <oechem.h>
#include <oemedchem.h>
#include "BemisMurckoPerception.itf"

using namespace OESystem;
using namespace OEChem;
using namespace OEMedChem;

int main(int argc, char *argv[])
{
  OEInterface itf(InterfaceData, argc, argv);

  // optional uncolor flag
  const bool bUncolor = itf.Get<bool>("-uncolor");

  // input structure(s) to transform;
  oemolistream ifs;
  if (!ifs.open(itf.Get<std::string>("-i")))
    OEThrow.Fatal("Unable to open file for reading: %s",
                  itf.Get<std::string>("-i").c_str());

  // save output structure(s) to this file;
  oemolostream ofs;
  if (!ofs.open(itf.Get<std::string>("-o")))
    OEThrow.Fatal("Unable to open file for writing: %s",
                  itf.Get<std::string>("-o").c_str());

  if (!OEIsSDDataFormat(ofs.GetFormat()))
    OEThrow.Fatal("Output file format does not support SD data: %s",
                  itf.Get<std::string>("-o").c_str());

  int irec = 0;
  int ototal = 0;
  OEGraphMol mol, frag;
  while (OEReadMolecule(ifs,mol))
  {
    ++irec;
    OEDeleteEverythingExceptTheFirstLargestComponent(mol);

    int regions = 0;
    for (OEIter<OEAtomBondSet> absets = OEGetBemisMurcko(mol); absets; ++absets)
    {
      OEAtomBondSet &abset = absets;
      ++regions;
      // create a fragment from the perceived region;
      OESubsetMol(frag, mol, abset, true);
      if (bUncolor)
      {
        // ignore 3D stereo parities
        if (frag.GetDimension() == 3)
          frag.SetDimension(0);
        // uncolor the fragment;
        OEUncolorMol(frag);
      }
      std::string smi = OEMolToSmiles(frag);
      // annotate the input molecule with the role information;
      for (OEIter<const OERole> role = abset.GetRoles(); role; ++role)
        OEAddSDData(mol, role->GetName(), smi);

    }
    if (regions == 0)
    {
      std::string name = mol.GetTitle();
      if (name.length() == 0)
        name = "Record " + OENumberToString(irec);
      OEThrow.Warning(name + ": no perceived regions");
    }
    else
    {
      ++ototal;
      OEWriteMolecule(ofs, mol);
    }
  }
  if (irec == 0)
    OEThrow.Fatal("No records in input structure file to perceive");

  if (ototal ==0)
    OEThrow.Warning("No annotated structures generated");

  printf("Input molecules=%d, output annotated %smolecules=%d",
         irec, ((bUncolor) ? "(uncolored) " : ""), ototal);

  return 0;
}

See also

Matched Pair analysis and transformations

/*
(C) 2017 OpenEye Scientific Software Inc. All rights reserved.

TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
provided to current licensees or subscribers of OpenEye products or
SaaS offerings (each a "Customer").
Customer is hereby permitted to use, copy, and modify the Sample Code,
subject to these terms. OpenEye claims no rights to Customer's
modifications. Modification of Sample Code is at Customer's sole and
exclusive risk. Sample Code may require Customer to have a then
current license or subscription to the applicable OpenEye offering.
THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be
liable for any damages or liability in connection with the Sample Code
or its use.
*/
//*****************************************************************************
//* Utility to perform a matched pair analysis on a set of structures
//*  and use the transformations discovered to alter a second set of structures
//* ---------------------------------------------------------------------------
//* MatchedPairTransform [-index] index_mols [ -i ] input_mols [ -o ] output_mols
//*                      [-fragGe min%] [-fragLe max%] [-context 0|1|2|3|A] [-minpairs #]
//*
//* index_mols: filename of input molecules to analyze
//* input_mols: filename of molecules to transform based on analysis
//* output_mols: filename to collect transformed molecules
//*****************************************************************************
#include <openeye.h>
#include <oesystem.h>
#include <oechem.h>
#include <oemedchem.h>
#include "MatchedPairTransform.itf"

using namespace OESystem;
using namespace OEChem;
using namespace OEMedChem;

int main(int argc, char *argv[])
{
  OEInterface itf(InterfaceData);
  OEConfigureMatchedPairIndexOptions(itf);

  if(!OEParseCommandLine(itf, argc, argv))
    OEThrow.Fatal("Unable to interpret command line!");

  // input structures to index;
  oemolistream ifsindex;
  if (!ifsindex.open(itf.Get<std::string>("-index")))
    OEThrow.Fatal("Unable to open index file for reading: " + itf.Get<std::string>("-index"));

  // input structure(s) to transform;
  oemolistream ifsmols;
  if (!ifsmols.open(itf.Get<std::string>("-input")))
    OEThrow.Fatal("Unable to open input file for reading: " + itf.Get<std::string>("-input"));

  // save output structure(s) to this file;
  oemolostream ofs;
  if (!ofs.open(itf.Get<std::string>("-output")))
    OEThrow.Fatal("Unable to open output file for writing: " + itf.Get<std::string>("-output"));

  // create options class with defaults;
  OEMatchedPairAnalyzerOptions opts;
  // apply requested command line options
  if (!OESetupMatchedPairIndexOptions(opts, itf))
    OEThrow.Fatal("Failure in OESetupMatchedPairIndexOptions!");

  if (!opts.HasIndexableFragmentHeavyAtomRange())
    OEThrow.Info("Indexing all fragments");
  else
    printf("Using index range=%.1f-%.1f%%\n",
           opts.GetIndexableFragmentRangeMin(), opts.GetIndexableFragmentRangeMax());

  // request a specific context for the transform activity, here 0-bonds;
  int chemctxt = OEMatchedPairContext::Bond0;
  std::string askcontext = itf.Get<std::string>("-context");
  char ctxt = askcontext.at(0);
  switch (ctxt)
  {
  case '0':
    chemctxt = OEMatchedPairContext::Bond0;
    break;
  case '1':
    chemctxt = OEMatchedPairContext::Bond1;
    break;
  case '2':
    chemctxt = OEMatchedPairContext::Bond2;
    break;
  case '3':
    chemctxt = OEMatchedPairContext::Bond3;
    break;
  case 'a':
  case 'A':
    chemctxt = OEMatchedPairContext::AllBonds;
    break;
  default:
    OEThrow.Fatal("Invalid context specified:%c, only 0|1|2|3|A allowed",ctxt);
    break;
  }

  // create indexing engine;
  OEMatchedPairAnalyzer mmp(opts);

  bool verbose = itf.Get<bool>("-verbose");

  // add molecules to be indexed;
  int record = 0;
  OEGraphMol mol;
  unsigned int irec = 0;
  while (OEReadMolecule(ifsindex,mol))
  {
    ++irec;
    int status = mmp.AddMol(mol,++record);
    if (verbose && status != record)
      OEThrow.Info("Input structure not added to index, record=%d status=%s",
                   record, OEMatchedPairIndexStatusName(status));
  }

  if (irec == 0)
    OEThrow.Fatal("No records in input structure file for indexing");

  if (!mmp.NumMatchedPairs())
    OEThrow.Fatal("No matched pairs found from indexing, use -fragGe,-fragLe options to extend indexable range");

  // return some status information;
  printf("indexed molecules=%d matched pairs=%d\n",
         mmp.NumMols(),mmp.NumMatchedPairs());

  unsigned int minpairs = itf.Get<unsigned int>("-minpairs");
  if (minpairs > 1u)
    OEThrow.Info("Requiring at least %d matched pairs to apply transformations", minpairs);

  int orec = 0;
  int ocnt = 0;
  int ototal = 0;
  while(OEReadMolecule(ifsmols,mol))
  {
    ++orec ;
    ocnt = 0;
    for (OEIter<OEMolBase> outmol = OEMatchedPairApplyTransforms(mol, mmp, chemctxt, minpairs); outmol; ++outmol)
    {
      ++ocnt ;
      OEWriteMolecule(ofs, outmol);
    }
    ototal += ocnt ;
    if (verbose && ocnt == 0)
    {
      // as minpairs increases, fewer transformed mols are generated - output if requested
      std::string name = mol.GetTitle();
      if (name.empty())
        name = "Record " + OENumberToString(orec);
      OEThrow.Info("%s: did not produce any output", name.c_str());
    }
  }
  if (orec == 0)
    OEThrow.Fatal("No records in input structure file to transform");

  if (ototal == 0)
    OEThrow.Fatal("No transformed structures generated");

  printf("Input molecules=%d Output molecules=%d\n", orec, ototal);
  return ((!ototal) ? 1 : 0);
}

Matched Pair analysis and listing of transformations

/*
(C) 2017 OpenEye Scientific Software Inc. All rights reserved.

TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
provided to current licensees or subscribers of OpenEye products or
SaaS offerings (each a "Customer").
Customer is hereby permitted to use, copy, and modify the Sample Code,
subject to these terms. OpenEye claims no rights to Customer's
modifications. Modification of Sample Code is at Customer's sole and
exclusive risk. Sample Code may require Customer to have a then
current license or subscription to the applicable OpenEye offering.
THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be
liable for any damages or liability in connection with the Sample Code
or its use.
*/
//*****************************************************************************
// * Utility to perform a matched pair analysis on a set of structures
// *  and dump a listing of the transformations derived from matched pairs found
// * ---------------------------------------------------------------------------
// * MatchedPairTransformList index_mols
// *
// * index_mols: filename of input molecules to analyze
//*****************************************************************************
#include <openeye.h>
#include <oesystem.h>
#include <oechem.h>
#include <oemedchem.h>
#include "MatchedPairTransformList.itf"

using namespace OESystem;
using namespace OEChem;
using namespace OEMedChem;

// simple public class to collect/compute stats on matched pairs
class MMPXform
{
public:
  MMPXform(OEMatchedPairTransform xfm,
                        float average, float stddev, int count)
  {
    _xform = xfm;
    _avg = average;
    _std = stddev;
    _num = count;
  }
  MMPXform(const MMPXform &rhs)
  {
    *this = rhs;
  }
  MMPXform& operator=(const MMPXform &rhs)
  {
    if (this == &rhs)
      return *this;

    _xform = rhs._xform;
    _avg = rhs._avg;
    _std = rhs._std;
    _num = rhs._num;
    return *this;
  }

  // defined so we can use std::sort
  bool operator<(const MMPXform& cmp) const
  {
    float lAvg = ((_avg < 0.) ? -(_avg) : _avg);
    float rAvg = ((cmp._avg < 0.) ? -(cmp._avg) : cmp._avg);
    return (lAvg > rAvg); // note: largest->smallest
  }
  const OEMatchedPairTransform& Xform() const { return _xform; }
  OEMatchedPairTransform& GetXform()          { return _xform; }
  float Average() const                       { return _avg;   }
  bool SetAverage(float avg)                  { _avg = avg; return true; }
  float StdDev() const                        { return _std;   }
  int Num() const                             { return _num;   }

  OEMatchedPairTransform _xform;
  float _avg;
  float _std;
  int   _num;
};

// compute some simple statistics for ranking
static float Average(const std::vector<float> data)
{
  if (data.empty())
    return 0.0f;
  if (data.size() == 1)
    return data[0];

  float avg = 0.0f;
  for(std::vector<float>::const_iterator elt = data.begin();
      elt != data.end();
      ++elt)
    avg += *elt;
  return (avg / float(data.size()));
}
static float StdDev(const std::vector<float> data)
{
  if (data.size() <= 1)
    return 0.0f;

  float mean = Average(data);

  double sum = 0.0;
  for(std::vector<float>::const_iterator elt = data.begin();
      elt != data.end();
      ++elt)
  {
    double v = *elt - mean;
    sum += (v * v);
  }
  return (float)sqrt( sum / (double)data.size() );
}

int main(int argc, char *argv[])
{
  OEInterface itf(InterfaceData);
  OEConfigureMatchedPairIndexOptions(itf);

  if(!OEParseCommandLine(itf, argc, argv))
    OEThrow.Fatal("Unable to interpret command line!");

  // input structures to index;
  oemolistream ifs;
  if (!ifs.open(itf.Get<std::string>("-input")))
    OEThrow.Fatal("Unable to open input file for reading: %s", itf.Get<std::string>("-input").c_str());

  // request a specific context for the transform activity, here 0-bonds;
  int chemctxt = OEMatchedPairContext::Bond0;
  std::string askcontext = itf.Get<std::string>("-context");
  char ctxt = askcontext.at(0);
  switch (ctxt)
  {
  case '0':
    chemctxt = OEMatchedPairContext::Bond0;
    break;
  case '1':
    chemctxt = OEMatchedPairContext::Bond1;
    break;
  case '2':
    chemctxt = OEMatchedPairContext::Bond2;
    break;
  case '3':
    chemctxt = OEMatchedPairContext::Bond3;
    break;
  case 'a':
  case 'A':
    chemctxt = OEMatchedPairContext::AllBonds;
    break;
  default:
    OEThrow.Fatal("Invalid context specified:%c, only 0|1|2|3|A allowed", ctxt);
    break;
  }

  bool bPrintTransforms = itf.Get<bool>("-printlist");
  // if a data field was specified, retreive the SD data field name
  std::string field;
  if (itf.Has<std::string>("-datafield"))
    field = itf.Get<std::string>("-datafield");

  // create options class with defaults;
  OEMatchedPairAnalyzerOptions opts;
  // apply requested command line options
  if (!OESetupMatchedPairIndexOptions(opts, itf))
    OEThrow.Fatal("Failure in OESetupMatchedPairIndexOptions!");

  if (!opts.HasIndexableFragmentHeavyAtomRange())
    OEThrow.Info("Indexing all fragments");
  else
    printf("Using index range=%.1f-%.1f%%\n",
           opts.GetIndexableFragmentRangeMin(), opts.GetIndexableFragmentRangeMax());

  if (field.empty() && !bPrintTransforms) {
    OEThrow.Info("Specify -datafield or -printlist, otherwise nothing to do!");
    return 1;
  }

  // create indexing engine;
  OEMatchedPairAnalyzer mmp(opts);

  // add molecules to be indexed;
  bool bFoundData = false;
  OEGraphMol mol;
  int record = 0;
  while (OEReadMolecule(ifs,mol))
  {
    int status = mmp.AddMol(mol,++record);
    if (status != record)
    {
      OEThrow.Info("Error adding input structure to index, record=%d status=%s",
                   record, OEMatchedPairIndexStatusName(status));
    }
    else if (!field.empty() && OEHasSDData(mol,field))
    {
      float fvalue;
      if (OEStringToNumber(OEGetSDData(mol,field),fvalue))
        bFoundData = true;
      else
        OEThrow.Fatal("%d: Non-numeric data for field %s found, %s",
                      record, field.c_str(), OEGetSDData(mol,field).c_str());
    }
  }

  if (mmp.NumMols() == 0)
    OEThrow.Fatal("No records in input structure file were indexed");

  if (!field.empty() && !bFoundData)
    OEThrow.Fatal("No data found for requested field, %s", field.c_str());

  if (mmp.NumMatchedPairs() == 0)
    OEThrow.Fatal("No matched pairs found from indexing, use -fragGe,-fragLe options to extend index range");

  // controls how transforms are extracted (direction and allowed properties)
  int extractMode = (OEMatchedPairTransformExtractMode::Sorted +
                     OEMatchedPairTransformExtractMode::NoSMARTS);

  // now walk the transforms from the indexed matched pairs
  std::vector<MMPXform> xforms ;

  int xfmidx = 0;
  for (OEIter<OEMatchedPairTransform> xformitor = OEMatchedPairGetTransforms(mmp,chemctxt,extractMode);
       xformitor;
       ++xformitor)
  {
    const OEMatchedPairTransform &mmpxform = *xformitor;
    ++xfmidx;

    if (bPrintTransforms)
      printf("%2d %s\n", xfmidx, mmpxform.GetTransform().c_str());

    int mmpidx = 0;
    std::vector<float> prop;
    for (OEIter<OEMatchedPair> mmpitor = mmpxform.GetMatchedPairs(); mmpitor; ++mmpitor)
    {
      const OEMatchedPair &mmppair = *mmpitor;

      ++mmpidx;
      char buff[100];
      sprintf(buff,"\t%2d: (%2d,%2d)",mmpidx,mmppair.FromIndex(),mmppair.ToIndex());
      std::string mmpinfo = buff;

      for (OEIter<const std::string> tagitor = mmppair.GetDataTags(); tagitor; ++tagitor)
      {
        const std::string &tag = *tagitor;
        mmpinfo = mmpinfo + std::string(" " + tag + "=(" +  mmppair.GetFromSDData(tag) + "," + mmppair.GetToSDData(tag) + ")");

        if (tag.compare(field) == 0) {
          float fromValue;
          if (!OEStringToNumber(mmppair.GetFromSDData(tag),fromValue))
            fromValue = 0.0;
          float toValue;
          if (!OEStringToNumber(mmppair.GetToSDData(tag),toValue))
            toValue = 0.0;
          prop.push_back(toValue - fromValue);
        }

      }
      if (bPrintTransforms)
        printf("%s\n",mmpinfo.c_str());
    }
    // skip if property not found
    if (!prop.empty()) {
      // add
      xforms.push_back(MMPXform(mmpxform, (float)Average(prop), (float)StdDev(prop), (int)prop.size()));
    }
  }

  if (field.empty())
    return 0; // done

  if (xforms.empty())
    OEThrow.Error("No matched pairs found with %s data",field.c_str());

  // sort the transforms by largest absolute delta property value
  std::sort(xforms.begin(), xforms.end());

  printf("\n*** Transforms sorted by delta %s\n", field.c_str());

  int idx = 0;
  for (std::vector<MMPXform>::iterator xfmitor = xforms.begin();
       xfmitor != xforms.end();
       ++xfmitor)
  {
    MMPXform &xfm = *xfmitor;

    ++idx;
    if ((extractMode & OEMatchedPairTransformExtractMode::NoSMARTS) != 0) {
      // not 'invertable' if SMARTS qualifiers were applied
      if (xfm.Average() < 0.0) {
        xfm.SetAverage(-1.0f * xfm.Average());
        xfm.GetXform().Invert();
      }
    }
    printf("%2d %s=(avg=%.2f,stdev=%.2f,num=%d) %s\n",idx,
           field.c_str(),
           xfm.Average(),
           xfm.StdDev(),
           xfm.Num(),
           xfm.Xform().GetTransform().c_str());
  }
  return 0;
}

Apply ChEMBL solubility transformations

/*
(C) 2017 OpenEye Scientific Software Inc. All rights reserved.

TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
provided to current licensees or subscribers of OpenEye products or
SaaS offerings (each a "Customer").
Customer is hereby permitted to use, copy, and modify the Sample Code,
subject to these terms. OpenEye claims no rights to Customer's
modifications. Modification of Sample Code is at Customer's sole and
exclusive risk. Sample Code may require Customer to have a then
current license or subscription to the applicable OpenEye offering.
THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be
liable for any damages or liability in connection with the Sample Code
or its use.
*/
//*****************************************************************************
//* Utility to apply ChEMBL18 solubility transforms to an input set of structures
//* ---------------------------------------------------------------------------
//* ChEMBLsolubility [-i] input_mols [-o] output_mols [-verbose] [-context #] [-minpairs #]
//*
//* input_mols: filename of molecules to transform based on analysis
//* output_mols: filename to collect transformed molecules
//* [-verbose]: optional flag to request verbose progress
//* [-context #]: optional flag to request a specific chemistry context
//* [-minpairs #]: optional flag to request a minimum number of pairs to apply transforms
//*****************************************************************************
#include <openeye.h>
#include <oesystem.h>
#include <oechem.h>
#include <oemedchem.h>
#include "ChEMBLsolubility.itf"

using namespace OESystem;
using namespace OEChem;
using namespace OEMedChem;

int main(int argc, char *argv[])
{
  OEInterface itf(InterfaceData, argc, argv);

  bool verbose = itf.Get<bool>("-verbose");

  // input structure(s) to transform;
  oemolistream ifs;
  if (!ifs.open(itf.Get<std::string>("-i")))
    OEThrow.Fatal("Unable to open file for reading: %s",itf.Get<std::string>("-i").c_str());

  // save output structure(s) to this file;
  oemolostream ofs;
  if (!ofs.open(itf.Get<std::string>("-o")))
    OEThrow.Fatal("Unable to open file for writing: %s",itf.Get<std::string>("-o").c_str());

  // request a specific context for the transform activity, here 0-bonds;
  int chemctxt = OEMatchedPairContext::Bond0;
  std::string askcontext = itf.Get<std::string>("-context");
  char ctxt = askcontext.at(0);
  switch (ctxt) {
  case '0':
    chemctxt = OEMatchedPairContext::Bond0;
    break;
  case '2':
    chemctxt = OEMatchedPairContext::Bond2;
    break;
  default:
    OEThrow.Fatal("Invalid context specified: %s, only 0|2 allowed",askcontext.c_str());
    break;
  }

  unsigned int minpairs = itf.Get<unsigned int>("-minpairs");
  if (minpairs > 1u)
    OEThrow.Info("Requiring at least %d matched pairs to apply transformations", minpairs);

  int irec = 0;
  int ocnt = 0;
  int ototal = 0;
  OEGraphMol mol;
  while (OEReadMolecule(ifs,mol))
  {
    ++irec;
    OEDeleteEverythingExceptTheFirstLargestComponent(mol);

    ocnt = 0;
    for (OEIter<OEMolBase> outmol = OEApplyChEMBL18SolubilityTransforms(mol, chemctxt, minpairs); outmol; ++outmol)
    {
      ++ocnt;
      OEWriteMolecule(ofs, outmol);
    }
    if (ocnt == 0)
    {
      std::string name = mol.GetTitle();
      if (name.length() == 0)
        name = "Record " + OENumberToString(irec);
      printf("%s: did not produce any output", name.c_str());
      printf("%s\n", OEMolToSmiles(mol).c_str());
    }
    else
    {
      ototal += ocnt;
      if (verbose)
        printf("Record: %d transformation count=%d total mols=%d\n",
               irec, ocnt, ototal);
    }
  }

  if (irec == 0)
    OEThrow.Fatal("No records in input structure file to transform");

  if (ototal == 0)
    OEThrow.Warning("No transformed structures generated");
  else
    printf("Input molecules=%d Output molecules=%d", irec, ototal);

  return ((!ototal) ? 1 : 0);
}

MCS Similarity score reporting

/*
(C) 2017 OpenEye Scientific Software Inc. All rights reserved.

TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
provided to current licensees or subscribers of OpenEye products or
SaaS offerings (each a "Customer").
Customer is hereby permitted to use, copy, and modify the Sample Code,
subject to these terms. OpenEye claims no rights to Customer's
modifications. Modification of Sample Code is at Customer's sole and
exclusive risk. Sample Code may require Customer to have a then
current license or subscription to the applicable OpenEye offering.
THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be
liable for any damages or liability in connection with the Sample Code
or its use.
*/
//*****************************************************************************
//* Utility to index a set of structures and return the highest MCS similarity
//*  structures from an input query structure
//* ---------------------------------------------------------------------------
//* MCSFragDatabase [ -inmols index_mols | -indb index.mcsfrag ]
//*                 [ -query query_file ]
//*                 [ -type bond -limit 10 ]    // 10 Tanimoto bond similarity scores
//*                 [ -verbose 1 ]              // optional verbosity
//*                 [ -outdb outindex.mcsfrag ] // optional save index file
//*
//* index_mols: filename of input molecules to analyze
//* input.mcsfrag: filename of index file to load
//* query_file: filename of query to report MCS similarity results
//*****************************************************************************
#include <openeye.h>
#include <oesystem.h>
#include <oechem.h>
#include <oemedchem.h>
#include "MCSFragDatabase.itf"

using namespace OESystem;
using namespace OEChem;
using namespace OEMedChem;

int main(int argc, char *argv[])
{
  OEInterface itf(InterfaceData);
  OEConfigureMCSFragDatabaseOptions(itf);

  if(!OEParseCommandLine(itf, argc, argv))
    OEThrow.Fatal("Unable to interpret command line!");

  OEMCSFragDatabaseOptions mcsopt;
  if (!OESetupMCSFragDatabaseOptions(mcsopt, itf))
    OEThrow.Fatal("Error setting MCS fragment database options");

  // either:
  //   1) input structures to index
  //   2) MCS fragment database to load
  std::string indb;
  std::string outdb;
  std::string inmols;

  bool verbose = itf.Get<bool>("-verbose");

  if (!itf.Has<std::string>("-inmols") && !itf.Has<std::string>("-indb"))
    OEThrow.Fatal("Must specify either -inmols or -indb");
  else if (itf.Has<std::string>("-inmols") && itf.Has<std::string>("-indb"))
    OEThrow.Fatal("Must specify only one of -inmols or -indb");
  else if (itf.Has<std::string>("-inmols") && !itf.Has<std::string>("-outdb") && !itf.Has<std::string>("-query"))
    OEThrow.Fatal("Must specify one of -outdb or -query with -inmols");
  else if (itf.Has<std::string>("-indb") && !itf.Has<std::string>("-query"))
    OEThrow.Fatal("Must specify -query with -indb");

  if (itf.Has<std::string>("-outdb"))
  {
    outdb = itf.Get<std::string>("-outdb");
    if (!OEIsMCSFragDatabaseFiletype(outdb))
      OEThrow.Fatal("Option -outdb must specify a .mcsfrag filename: " + outdb);
  }

  // initialize the MCS fragment index
  OEMCSFragDatabase mcsdb(mcsopt);

  oemolistream ifsindex;
  if (itf.Has<std::string>("-indb"))
  {
    indb = itf.Get<std::string>("-indb");
    if (!OEIsMCSFragDatabaseFiletype(indb))
      OEThrow.Fatal("Option -indb must specify a .mcsfrag filename: " + indb);
    if (verbose)
      OEThrow.Info("Loading index from " + indb);
    if (!OEReadMCSFragDatabase(indb, mcsdb))
      OEThrow.Fatal("Error deserializing MCS fragment database: " + indb);
    if (itf.Has<std::string>("-outdb"))
    {
      OEThrow.Warning("Option -outdb only meaningful with -inmols, ignoring");
      outdb.clear();
    }
    if (!mcsdb.NumMols())
      OEThrow.Fatal("Loaded empty index");

    // index instantiated - drop through to results
  }
  else if (itf.Has<std::string>("-inmols"))
  {
    inmols = itf.Get<std::string>("-inmols");
    if (!ifsindex.open(inmols))
      OEThrow.Fatal("Unable to open input file for indexing: " + inmols);
  }

  std::string queryfile;
  oemolistream ifsquery;
  if (itf.Has<std::string>("-query"))
  {
    queryfile = itf.Get<std::string>("-query");
    if (!ifsquery.open(queryfile))
      OEThrow.Fatal("Unable to open query file: " + queryfile);
  }

  if (!inmols.empty())
  {
    if (!mcsopt.HasIndexableFragmentHeavyAtomRange())
      OEThrow.Info("Indexing all fragments");
    else
      printf("Using index range=%.1f-%.1f%%\n",
             mcsopt.GetIndexableFragmentRangeMin(), mcsopt.GetIndexableFragmentRangeMax());

    if ((mcsopt.GetOptions() & OEMatchedPairOptions::AllCuts) != OEMatchedPairOptions::AllCuts)
      printf("Using nondefault cut option=%0x\n", (mcsopt.GetOptions() & OEMatchedPairOptions::AllCuts));
    if ((mcsopt.GetOptions() & OEMatchedPairOptions::IndexHSites) == 0)
      printf("Using nondefault HMember off option\n");

    // add molecules to be indexed;
    unsigned int maxrec = 0;
    if (itf.Has<unsigned int>("-maxrec"))
      maxrec = itf.Get<unsigned int>("-maxrec");
    unsigned int vstatus = ( verbose ? 1000 : 0 );

    int record = 0;
    OEGraphMol mol;
    unsigned int indexed = 0;
    while (OEReadMolecule(ifsindex,mol))
    {
      ++record;
      int status = mcsdb.AddMol(mol,record);
      if (status == record)
          ++indexed;
      else if (verbose)
        OEThrow.Info("Input structure not added to index, record=%d status=%s",
                     record, OEMatchedPairIndexStatusName(status));
      if (maxrec && (unsigned)record >= maxrec)
        break;  // maximum record limit reached
      if (vstatus && (record % vstatus) == 0)
        printf("%d records processed, %ud structures indexed\n", record, indexed);
    }

    if (record == 0)
      OEThrow.Fatal("No records in input structure file for indexing");

    // indexing complete - drop through to results
  }

  if (!mcsdb.NumFragments())
    OEThrow.Fatal("No fragments found from indexing, use -fragGe,-fragLe options to extend indexable range");

  if (!outdb.empty())
  {
    if (verbose)
      OEThrow.Info("Writing index to " + outdb);
    if (!OEWriteMCSFragDatabase(outdb, mcsdb))
      OEThrow.Fatal("Error serializing MCS fragment database: " + outdb);
  }

  // return some status information about the index
  printf("indexed molecules=%d fragments=%d\n",
         mcsdb.NumMols(),mcsdb.NumFragments());

  // check for (optional) query
  if (queryfile.empty())
    return 0;

  // process the query options
  unsigned int qmaxrec = 0;
  if (itf.Has<unsigned int>("-qlim"))
    qmaxrec = itf.Get<unsigned int>("-qlim");

  unsigned int numscores = 10;
  if (itf.Has<unsigned int>("-limit"))
    numscores = itf.Get<unsigned int>("-limit");

  unsigned int cnttype = OEMCSScoreType::BondCount;
  if (itf.Has<std::string>("-type"))
  {
    if (itf.Get<std::string>("-type").find("atom") != std::string::npos)
      cnttype = OEMCSScoreType::AtomCount;
    else if (itf.Get<std::string>("-type").find("bond") != std::string::npos)
      cnttype = OEMCSScoreType::BondCount;
    else
      OEThrow.Warning("Ignoring unrecognized -type option, expecting atom or bond: " + itf.Get<std::string>("-type"));
  }

  // process the queries
  OEGraphMol qmol;
  unsigned int qnum = 0;
  while (OEReadMolecule(ifsquery, qmol))
  {
    ++qnum;

    unsigned int numhits = 0u;
    for (OEIter<OEMedChem::OEMCSMolSimScore> scores = mcsdb.GetSortedScores(qmol, numscores, 0, 0, true, cnttype);
         scores;
         ++scores, ++numhits)
    {
      OEMedChem::OEMCSMolSimScore &score = scores;

      if (!numhits)
        printf("Query: %d: %s\n", qnum, qmol.GetTitle());

      printf("\trecord: %6d\ttanimoto_%s_score: %.2f\tmcs_core: %s\n",
             score.GetIdx(), ((cnttype == OEMCSScoreType::BondCount) ? "bond" : "atom" ), score.GetScore(), score.GetMCSCore().c_str());
    }
    if (!numhits)
      OEThrow.Warning("Query: %d: %s - no hits\n", qnum, qmol.GetTitle());

    if (qmaxrec && qnum >= qmaxrec)
      break;
  }
  if (!qnum)
    OEThrow.Fatal("Error reading query structure(s): " + queryfile);

  return 0;
}