OEMedChem Examples

Bemis Murcko perception

//*****************************************************************************
//* Copyright (C) 2014 OpenEye Scientific Software, Inc.
//*****************************************************************************
//* Utility to fragment the input structures by Bemis-Murcko rules
//* ---------------------------------------------------------------------------
//* BemisMurckoPerception -i input_mols -o output_mols [-uncolor]
//*
//* input_mols: filename of molecules to fragment and uncolor
//* output_mols: filename of input structure with SDData of perceived regions
//* [-uncolor]: optional flag to request uncoloring of output fragment info
//*****************************************************************************
#include <openeye.h>
#include <oesystem.h>
#include <oechem.h>
#include <oemedchem.h>
#include "BemisMurckoPerception.itf"

using namespace OESystem;
using namespace OEChem;
using namespace OEMedChem;

int main(int argc, char *argv[])
{
  OEInterface itf(InterfaceData, argc, argv);

  // optional uncolor flag
  const bool bUncolor = itf.Get<bool>("-uncolor");

  // input structure(s) to transform;
  oemolistream ifs;
  if (!ifs.open(itf.Get<std::string>("-i")))
    OEThrow.Fatal("Unable to open file for reading: %s",
                  itf.Get<std::string>("-i").c_str());

  // save output structure(s) to this file;
  oemolostream ofs;
  if (!ofs.open(itf.Get<std::string>("-o")))
    OEThrow.Fatal("Unable to open file for writing: %s",
                  itf.Get<std::string>("-o").c_str());

  if (!OEIsSDDataFormat(ofs.GetFormat()))
    OEThrow.Fatal("Output file format does not support SD data: %s",
                  itf.Get<std::string>("-o").c_str());

  int irec = 0;
  int ototal = 0;
  OEGraphMol mol, frag;
  while (OEReadMolecule(ifs,mol))
  {
    ++irec;
    OETheFunctionFormerlyKnownAsStripSalts(mol);

    int regions = 0;
    for (OEIter<OEAtomBondSet> absets = OEGetBemisMurcko(mol); absets; ++absets)
    {
      OEAtomBondSet &abset = absets;
      ++regions;
      // create a fragment from the perceived region;
      OESubsetMol(frag, mol, abset, true);
      if (bUncolor)
      {
        // uncolor the fragment;
        OEUncolorMol(frag);
      }
      std::string smi = OEMolToSmiles(frag);
      // annotate the input molecule with the role information;
      for (OEIter<const OERole> role = abset.GetRoles(); role; ++role)
        OEAddSDData(mol, role->GetName(), smi);

    }
    if (regions == 0)
    {
      std::string name = mol.GetTitle();
      if (name.length() == 0)
        name = "Record " + OENumberToString(irec);
      OEThrow.Warning(name + ": no perceived regions");
    }
    else
    {
      ++ototal;
      OEWriteMolecule(ofs, mol);
    }
  }
  if (irec == 0)
    OEThrow.Fatal("No records in input structure file to perceive");

  if (ototal ==0)
    OEThrow.Warning("No annotated structures generated");

  printf("Input molecules=%d, output annotated %smolecules=%d",
         irec, ((bUncolor) ? "(uncolored) " : ""), ototal);

  return 0;
}

See also

Matched Pair analysis and transformations

//*****************************************************************************
//* Copyright (C) 2014-2015 OpenEye Scientific Software, Inc.
//*****************************************************************************
//* Utility to perform a matched pair analysis on a set of structures
//*  and use the transformations discovered to alter a second set of structures
//* ---------------------------------------------------------------------------
//* MatchedPairTransform [-index] index_mols [ -i ] input_mols [ -o ] output_mols
//*                      [-fragGe min%] [-fragLe max%] [-context 0|1|2|3|A] [-minpairs #]
//*
//* index_mols: filename of input molecules to analyze
//* input_mols: filename of molecules to transform based on analysis
//* output_mols: filename to collect transformed molecules
//*****************************************************************************
#include <openeye.h>
#include <oesystem.h>
#include <oechem.h>
#include <oemedchem.h>
#include "MatchedPairTransform.itf"

using namespace OESystem;
using namespace OEChem;
using namespace OEMedChem;

int main(int argc, char *argv[])
{
  OEInterface itf(InterfaceData);
  OEConfigureMatchedPairIndexOptions(itf);

  if(!OEParseCommandLine(itf, argc, argv))
    OEThrow.Fatal("Unable to interpret command line!");

  // input structures to index;
  oemolistream ifsindex;
  if (!ifsindex.open(itf.Get<std::string>("-index")))
    OEThrow.Fatal("Unable to open index file for reading: " + itf.Get<std::string>("-index"));

  // input structure(s) to transform;
  oemolistream ifsmols;
  if (!ifsmols.open(itf.Get<std::string>("-input")))
    OEThrow.Fatal("Unable to open input file for reading: " + itf.Get<std::string>("-input"));

  // save output structure(s) to this file;
  oemolostream ofs;
  if (!ofs.open(itf.Get<std::string>("-output")))
    OEThrow.Fatal("Unable to open output file for writing: " + itf.Get<std::string>("-output"));

  // create options class with defaults;
  OEMatchedPairAnalyzerOptions opts;
  // apply requested command line options
  if (!OESetupMatchedPairIndexOptions(opts, itf))
    OEThrow.Fatal("Failure in OESetupMatchedPairIndexOptions!");

  if (!opts.HasIndexableFragmentHeavyAtomRange())
    OEThrow.Info("Indexing all fragments");
  else
    printf("Using index range=%.1f-%.1f%%\n",
           opts.GetIndexableFragmentRangeMin(), opts.GetIndexableFragmentRangeMax());

  // request a specific context for the transform activity, here 0-bonds;
  int chemctxt = OEMatchedPairContext::Bond0;
  std::string askcontext = itf.Get<std::string>("-context");
  char ctxt = askcontext.at(0);
  switch (ctxt)
  {
  case '0':
    chemctxt = OEMatchedPairContext::Bond0;
    break;
  case '1':
    chemctxt = OEMatchedPairContext::Bond1;
    break;
  case '2':
    chemctxt = OEMatchedPairContext::Bond2;
    break;
  case '3':
    chemctxt = OEMatchedPairContext::Bond3;
    break;
  case 'a':
  case 'A':
    chemctxt = OEMatchedPairContext::AllBonds;
    break;
  default:
    OEThrow.Fatal("Invalid context specified:%c, only 0|1|2|3|A allowed",ctxt);
    break;
  }

  // create indexing engine;
  OEMatchedPairAnalyzer mmp(opts);

  bool verbose = itf.Get<bool>("-verbose");

  // add molecules to be indexed;
  int record = 0;
  OEGraphMol mol;
  unsigned int irec = 0;
  while (OEReadMolecule(ifsindex,mol))
  {
    ++irec;
    int status = mmp.AddMol(mol,++record);
    if (verbose && status != record)
      OEThrow.Info("Input structure not added to index, record=%d status=%s",
                   record, OEMatchedPairIndexStatusName(status));
  }

  if (irec == 0)
    OEThrow.Fatal("No records in input structure file for indexing");

  if (!mmp.NumMatchedPairs())
    OEThrow.Fatal("No matched pairs found from indexing, use -fragGe,-fragLe options to extend indexable range");

  // return some status information;
  printf("indexed molecules=%d matched pairs=%d\n",
         mmp.NumMols(),mmp.NumMatchedPairs());

  unsigned int minpairs = itf.Get<unsigned int>("-minpairs");
  if (minpairs > 1u)
    OEThrow.Info("Requiring at least %d matched pairs to apply transformations", minpairs);

  int orec = 0;
  int ocnt = 0;
  int ototal = 0;
  while(OEReadMolecule(ifsmols,mol))
  {
    ++orec ;
    ocnt = 0;
    for (OEIter<OEMolBase> outmol = OEMatchedPairApplyTransforms(mol, mmp, chemctxt, minpairs); outmol; ++outmol)
    {
      ++ocnt ;
      OEWriteMolecule(ofs, outmol);
    }
    ototal += ocnt ;
    if (verbose && ocnt == 0)
    {
      // as minpairs increases, fewer transformed mols are generated - output if requested
      std::string name = mol.GetTitle();
      if (name.empty())
        name = "Record " + OENumberToString(orec);
      OEThrow.Info("%s: did not produce any output", name.c_str());
    }
  }
  if (orec == 0)
    OEThrow.Fatal("No records in input structure file to transform");

  if (ototal == 0)
    OEThrow.Fatal("No transformed structures generated");

  printf("Input molecules=%d Output molecules=%d\n", orec, ototal);
  return ((!ototal) ? 1 : 0);
}

Matched Pair analysis and listing of transformations

//*****************************************************************************
//* Copyright (C) 2014-2015 OpenEye Scientific Software, Inc.
//*****************************************************************************
// * Utility to perform a matched pair analysis on a set of structures
// *  and dump a listing of the transformations derived from matched pairs found
// * ---------------------------------------------------------------------------
// * MatchedPairTransformList index_mols
// *
// * index_mols: filename of input molecules to analyze
//*****************************************************************************
#include <openeye.h>
#include <oesystem.h>
#include <oechem.h>
#include <oemedchem.h>
#include "MatchedPairTransformList.itf"

using namespace OESystem;
using namespace OEChem;
using namespace OEMedChem;

// simple public class to collect/compute stats on matched pairs
class MMPXform
{
public:
  MMPXform(OEMatchedPairTransform xfm,
                        float average, float stddev, int count)
  {
    _xform = xfm;
    _avg = average;
    _std = stddev;
    _num = count;
  }
  MMPXform(const MMPXform &rhs)
  {
    *this = rhs;
  }
  MMPXform& operator=(const MMPXform &rhs)
  {
    if (this == &rhs)
      return *this;

    _xform = rhs._xform;
    _avg = rhs._avg;
    _std = rhs._std;
    _num = rhs._num;
    return *this;
  }

  // defined so we can use std::sort
  bool operator<(const MMPXform& cmp) const
  {
    float lAvg = ((_avg < 0.) ? -(_avg) : _avg);
    float rAvg = ((cmp._avg < 0.) ? -(cmp._avg) : cmp._avg);
    return (lAvg > rAvg); // note: largest->smallest
  }
  const OEMatchedPairTransform& Xform() const { return _xform; }
  OEMatchedPairTransform& GetXform()          { return _xform; }
  float Average() const                       { return _avg;   }
  bool SetAverage(float avg)                  { _avg = avg; return true; }
  float StdDev() const                        { return _std;   }
  int Num() const                             { return _num;   }

  OEMatchedPairTransform _xform;
  float _avg;
  float _std;
  int   _num;
};

// compute some simple statistics for ranking
static float Average(const std::vector<float> data)
{
  if (data.empty())
    return 0.0f;
  if (data.size() == 1)
    return data[0];

  float avg = 0.0f;
  for(std::vector<float>::const_iterator elt = data.begin();
      elt != data.end();
      ++elt)
    avg += *elt;
  return (avg / float(data.size()));
}
static float StdDev(const std::vector<float> data)
{
  if (data.size() <= 1)
    return 0.0f;

  float mean = Average(data);

  double sum = 0.0;
  for(std::vector<float>::const_iterator elt = data.begin();
      elt != data.end();
      ++elt)
  {
    double v = *elt - mean;
    sum += (v * v);
  }
  return (float)sqrt( sum / (double)data.size() );
}

int main(int argc, char *argv[])
{
  OEInterface itf(InterfaceData);
  OEConfigureMatchedPairIndexOptions(itf);

  if(!OEParseCommandLine(itf, argc, argv))
    OEThrow.Fatal("Unable to interpret command line!");

  // input structures to index;
  oemolistream ifs;
  if (!ifs.open(itf.Get<std::string>("-input")))
    OEThrow.Fatal("Unable to open input file for reading: %s", itf.Get<std::string>("-input").c_str());

  // request a specific context for the transform activity, here 0-bonds;
  int chemctxt = OEMatchedPairContext::Bond0;
  std::string askcontext = itf.Get<std::string>("-context");
  char ctxt = askcontext.at(0);
  switch (ctxt)
  {
  case '0':
    chemctxt = OEMatchedPairContext::Bond0;
    break;
  case '1':
    chemctxt = OEMatchedPairContext::Bond1;
    break;
  case '2':
    chemctxt = OEMatchedPairContext::Bond2;
    break;
  case '3':
    chemctxt = OEMatchedPairContext::Bond3;
    break;
  case 'a':
  case 'A':
    chemctxt = OEMatchedPairContext::AllBonds;
    break;
  default:
    OEThrow.Fatal("Invalid context specified:%c, only 0|1|2|3|A allowed", ctxt);
    break;
  }

  bool bPrintTransforms = itf.Get<bool>("-printlist");
  // if a data field was specified, retreive the SD data field name
  std::string field;
  if (itf.Has<std::string>("-datafield"))
    field = itf.Get<std::string>("-datafield");

  // create options class with defaults;
  OEMatchedPairAnalyzerOptions opts;
  // apply requested command line options
  if (!OESetupMatchedPairIndexOptions(opts, itf))
    OEThrow.Fatal("Failure in OESetupMatchedPairIndexOptions!");

  if (!opts.HasIndexableFragmentHeavyAtomRange())
    OEThrow.Info("Indexing all fragments");
  else
    printf("Using index range=%.1f-%.1f%%\n",
           opts.GetIndexableFragmentRangeMin(), opts.GetIndexableFragmentRangeMax());

  if (field.empty() && !bPrintTransforms) {
    OEThrow.Info("Specify -datafield or -printlist, otherwise nothing to do!");
    return 1;
  }

  // create indexing engine;
  OEMatchedPairAnalyzer mmp(opts);

  // add molecules to be indexed;
  bool bFoundData = false;
  OEGraphMol mol;
  int record = 0;
  while (OEReadMolecule(ifs,mol))
  {
    int status = mmp.AddMol(mol,++record);
    if (status != record)
    {
      OEThrow.Info("Error adding input structure to index, record=%d status=%s",
                   record, OEMatchedPairIndexStatusName(status));
    }
    else if (!field.empty() && OEHasSDData(mol,field))
    {
      float fvalue;
      if (OEStringToNumber(OEGetSDData(mol,field),fvalue))
        bFoundData = true;
      else
        OEThrow.Fatal("%d: Non-numeric data for field %s found, %s",
                      record, field.c_str(), OEGetSDData(mol,field).c_str());
    }
  }

  if (mmp.NumMols() == 0)
    OEThrow.Fatal("No records in input structure file were indexed");

  if (!field.empty() && !bFoundData)
    OEThrow.Fatal("No data found for requested field, %s", field.c_str());

  if (mmp.NumMatchedPairs() == 0)
    OEThrow.Fatal("No matched pairs found from indexing, use -fragGe,-fragLe options to extend index range");

  // controls how transforms are extracted (direction and allowed properties)
  int extractMode = (OEMatchedPairTransformExtractMode::Sorted +
                     OEMatchedPairTransformExtractMode::NoSMARTS);

  // now walk the transforms from the indexed matched pairs
  std::vector<MMPXform> xforms ;

  int xfmidx = 0;
  for (OEIter<OEMatchedPairTransform> xformitor = OEMatchedPairGetTransforms(mmp,chemctxt,extractMode);
       xformitor;
       ++xformitor)
  {
    const OEMatchedPairTransform &mmpxform = *xformitor;
    ++xfmidx;

    if (bPrintTransforms)
      printf("%2d %s\n", xfmidx, mmpxform.GetTransform().c_str());

    int mmpidx = 0;
    std::vector<float> prop;
    for (OEIter<OEMatchedPair> mmpitor = mmpxform.GetMatchedPairs(); mmpitor; ++mmpitor)
    {
      const OEMatchedPair &mmppair = *mmpitor;

      ++mmpidx;
      char buff[100];
      sprintf(buff,"\t%2d: (%2d,%2d)",mmpidx,mmppair.FromIndex(),mmppair.ToIndex());
      std::string mmpinfo = buff;

      for (OEIter<const std::string> tagitor = mmppair.GetDataTags(); tagitor; ++tagitor)
      {
        const std::string &tag = *tagitor;
        mmpinfo = mmpinfo + std::string(" " + tag + "=(" +  mmppair.GetFromSDData(tag) + "," + mmppair.GetToSDData(tag) + ")");

        if (tag.compare(field) == 0) {
          float fromValue;
          if (!OEStringToNumber(mmppair.GetFromSDData(tag),fromValue))
            fromValue = 0.0;
          float toValue;
          if (!OEStringToNumber(mmppair.GetToSDData(tag),toValue))
            toValue = 0.0;
          prop.push_back(toValue - fromValue);
        }

      }
      if (bPrintTransforms)
        printf("%s\n",mmpinfo.c_str());
    }
    // skip if property not found
    if (!prop.empty()) {
      // add
      xforms.push_back(MMPXform(mmpxform, (float)Average(prop), (float)StdDev(prop), (int)prop.size()));
    }
  }

  if (field.empty())
    return 0; // done

  if (xforms.empty())
    OEThrow.Error("No matched pairs found with %s data",field.c_str());

  // sort the transforms by largest absolute delta property value
  std::sort(xforms.begin(), xforms.end());

  printf("\n*** Transforms sorted by delta %s\n", field.c_str());

  int idx = 0;
  for (std::vector<MMPXform>::iterator xfmitor = xforms.begin();
       xfmitor != xforms.end();
       ++xfmitor)
  {
    MMPXform &xfm = *xfmitor;

    ++idx;
    if ((extractMode & OEMatchedPairTransformExtractMode::NoSMARTS) != 0) {
      // not 'invertable' if SMARTS qualifiers were applied
      if (xfm.Average() < 0.0) {
        xfm.SetAverage(-1.0f * xfm.Average());
        xfm.GetXform().Invert();
      }
    }
    printf("%2d %s=(avg=%.2f,stdev=%.2f,num=%d) %s\n",idx,
           field.c_str(),
           xfm.Average(),
           xfm.StdDev(),
           xfm.Num(),
           xfm.Xform().GetTransform().c_str());
  }
  return 0;
}

Apply ChEMBL solubility transformations

//*****************************************************************************
//* Copyright (C) 2014 OpenEye Scientific Software, Inc.
//*****************************************************************************
//* Utility to apply ChEMBL18 solubility transforms to an input set of structures
//* ---------------------------------------------------------------------------
//* ChEMBLsolubility [-i] input_mols [-o] output_mols [-verbose] [-context #] [-minpairs #]
//*
//* input_mols: filename of molecules to transform based on analysis
//* output_mols: filename to collect transformed molecules
//* [-verbose]: optional flag to request verbose progress
//* [-context #]: optional flag to request a specific chemistry context
//* [-minpairs #]: optional flag to request a minimum number of pairs to apply transforms
//*****************************************************************************
#include <openeye.h>
#include <oesystem.h>
#include <oechem.h>
#include <oemedchem.h>
#include "ChEMBLsolubility.itf"

using namespace OESystem;
using namespace OEChem;
using namespace OEMedChem;

int main(int argc, char *argv[])
{
  OEInterface itf(InterfaceData, argc, argv);

  bool verbose = itf.Get<bool>("-verbose");

  // input structure(s) to transform;
  oemolistream ifs;
  if (!ifs.open(itf.Get<std::string>("-i")))
    OEThrow.Fatal("Unable to open file for reading: %s",itf.Get<std::string>("-i").c_str());

  // save output structure(s) to this file;
  oemolostream ofs;
  if (!ofs.open(itf.Get<std::string>("-o")))
    OEThrow.Fatal("Unable to open file for writing: %s",itf.Get<std::string>("-o").c_str());

  // request a specific context for the transform activity, here 0-bonds;
  int chemctxt = OEMatchedPairContext::Bond0;
  std::string askcontext = itf.Get<std::string>("-context");
  char ctxt = askcontext.at(0);
  switch (ctxt) {
  case '0':
    chemctxt = OEMatchedPairContext::Bond0;
    break;
  case '2':
    chemctxt = OEMatchedPairContext::Bond2;
    break;
  default:
    OEThrow.Fatal("Invalid context specified: %s, only 0|2 allowed",askcontext.c_str());
    break;
  }

  unsigned int minpairs = itf.Get<unsigned int>("-minpairs");
  if (minpairs > 1u)
    OEThrow.Info("Requiring at least %d matched pairs to apply transformations", minpairs);

  int irec = 0;
  int ocnt = 0;
  int ototal = 0;
  OEGraphMol mol;
  while (OEReadMolecule(ifs,mol))
  {
    ++irec;
    OETheFunctionFormerlyKnownAsStripSalts(mol);

    ocnt = 0;
    for (OEIter<OEMolBase> outmol = OEApplyChEMBL18SolubilityTransforms(mol, chemctxt, minpairs); outmol; ++outmol)
    {
      ++ocnt;
      OEWriteMolecule(ofs, outmol);
    }
    if (ocnt == 0)
    {
      std::string name = mol.GetTitle();
      if (name.length() == 0)
        name = "Record " + OENumberToString(irec);
      printf("%s: did not produce any output", name.c_str());
      printf("%s\n", OEMolToSmiles(mol).c_str());
    }
    else
    {
      ototal += ocnt;
      if (verbose)
        printf("Record: %d transformation count=%d total mols=%d\n",
               irec, ocnt, ototal);
    }
  }

  if (irec == 0)
    OEThrow.Fatal("No records in input structure file to transform");

  if (ototal == 0)
    OEThrow.Warning("No transformed structures generated");
  else
    printf("Input molecules=%d Output molecules=%d", irec, ototal);

  return ((!ototal) ? 1 : 0);
}

MCS Similarity score reporting

//*****************************************************************************
//* Copyright (C) 2016 OpenEye Scientific Software, Inc.
//*****************************************************************************
//* Utility to index a set of structures and return the highest MCS similarity
//*  structures from an input query structure
//* ---------------------------------------------------------------------------
//* MCSFragDatabase [ -inmols index_mols | -indb index.mcsfrag ]
//*                 [ -query query_file ]
//*                 [ -type bond -limit 10 ]    // 10 Tanimoto bond similarity scores
//*                 [ -verbose 1 ]              // optional verbosity
//*                 [ -outdb outindex.mcsfrag ] // optional save index file
//*
//* index_mols: filename of input molecules to analyze
//* input.mcsfrag: filename of index file to load
//* query_file: filename of query to report MCS similarity results
//*****************************************************************************
#include <openeye.h>
#include <oesystem.h>
#include <oechem.h>
#include <oemedchem.h>
#include "MCSFragDatabase.itf"

using namespace OESystem;
using namespace OEChem;
using namespace OEMedChem;

int main(int argc, char *argv[])
{
  OEInterface itf(InterfaceData);
  OEConfigureMCSFragDatabaseOptions(itf);

  if(!OEParseCommandLine(itf, argc, argv))
    OEThrow.Fatal("Unable to interpret command line!");

  OEMCSFragDatabaseOptions mcsopt;
  if (!OESetupMCSFragDatabaseOptions(mcsopt, itf))
    OEThrow.Fatal("Error setting MCS fragment database options");

  // either:
  //   1) input structures to index
  //   2) MCS fragment database to load
  std::string indb;
  std::string outdb;
  std::string inmols;

  bool verbose = itf.Get<bool>("-verbose");

  if (!itf.Has<std::string>("-inmols") && !itf.Has<std::string>("-indb"))
    OEThrow.Fatal("Must specify either -inmols or -indb");
  else if (itf.Has<std::string>("-inmols") && itf.Has<std::string>("-indb"))
    OEThrow.Fatal("Must specify only one of -inmols or -indb");
  else if (itf.Has<std::string>("-inmols") && !itf.Has<std::string>("-outdb") && !itf.Has<std::string>("-query"))
    OEThrow.Fatal("Must specify one of -outdb or -query with -inmols");
  else if (itf.Has<std::string>("-indb") && !itf.Has<std::string>("-query"))
    OEThrow.Fatal("Must specify -query with -indb");

  if (itf.Has<std::string>("-outdb"))
  {
    outdb = itf.Get<std::string>("-outdb");
    if (!OEIsMCSFragDatabaseFiletype(outdb))
      OEThrow.Fatal("Option -outdb must specify a .mcsfrag filename: " + outdb);
  }

  // initialize the MCS fragment index
  OEMCSFragDatabase mcsdb(mcsopt);

  oemolistream ifsindex;
  if (itf.Has<std::string>("-indb"))
  {
    indb = itf.Get<std::string>("-indb");
    if (!OEIsMCSFragDatabaseFiletype(indb))
      OEThrow.Fatal("Option -indb must specify a .mcsfrag filename: " + indb);
    if (verbose)
      OEThrow.Info("Loading index from " + indb);
    if (!OEReadMCSFragDatabase(indb, mcsdb))
      OEThrow.Fatal("Error deserializing MCS fragment database: " + indb);
    if (itf.Has<std::string>("-outdb"))
    {
      OEThrow.Warning("Option -outdb only meaningful with -inmols, ignoring");
      outdb.clear();
    }
    if (!mcsdb.NumMols())
      OEThrow.Fatal("Loaded empty index");

    // index instantiated - drop through to results
  }
  else if (itf.Has<std::string>("-inmols"))
  {
    inmols = itf.Get<std::string>("-inmols");
    if (!ifsindex.open(inmols))
      OEThrow.Fatal("Unable to open input file for indexing: " + inmols);
  }

  std::string queryfile;
  oemolistream ifsquery;
  if (itf.Has<std::string>("-query"))
  {
    queryfile = itf.Get<std::string>("-query");
    if (!ifsquery.open(queryfile))
      OEThrow.Fatal("Unable to open query file: " + queryfile);
  }

  if (!inmols.empty())
  {
    if (!mcsopt.HasIndexableFragmentHeavyAtomRange())
      OEThrow.Info("Indexing all fragments");
    else
      printf("Using index range=%.1f-%.1f%%\n",
             mcsopt.GetIndexableFragmentRangeMin(), mcsopt.GetIndexableFragmentRangeMax());

    if ((mcsopt.GetOptions() & OEMatchedPairOptions::AllCuts) != OEMatchedPairOptions::AllCuts)
      printf("Using nondefault cut option=%0x\n", (mcsopt.GetOptions() & OEMatchedPairOptions::AllCuts));
    if ((mcsopt.GetOptions() & OEMatchedPairOptions::IndexHSites) == 0)
      printf("Using nondefault HMember off option\n");

    // add molecules to be indexed;
    unsigned int maxrec = 0;
    if (itf.Has<unsigned int>("-maxrec"))
      maxrec = itf.Get<unsigned int>("-maxrec");
    unsigned int vstatus = ( verbose ? 1000 : 0 );

    int record = 0;
    OEGraphMol mol;
    unsigned int indexed = 0;
    while (OEReadMolecule(ifsindex,mol))
    {
      ++record;
      int status = mcsdb.AddMol(mol,record);
      if (status == record)
          ++indexed;
      else if (verbose)
        OEThrow.Info("Input structure not added to index, record=%d status=%s",
                     record, OEMatchedPairIndexStatusName(status));
      if (maxrec && (unsigned)record >= maxrec)
        break;  // maximum record limit reached
      if (vstatus && (record % vstatus) == 0)
        printf("%d records processed, %ud structures indexed\n", record, indexed);
    }

    if (record == 0)
      OEThrow.Fatal("No records in input structure file for indexing");

    // indexing complete - drop through to results
  }

  if (!mcsdb.NumFragments())
    OEThrow.Fatal("No fragments found from indexing, use -fragGe,-fragLe options to extend indexable range");

  if (!outdb.empty())
  {
    if (verbose)
      OEThrow.Info("Writing index to " + outdb);
    if (!OEWriteMCSFragDatabase(outdb, mcsdb))
      OEThrow.Fatal("Error serializing MCS fragment database: " + outdb);
  }

  // return some status information about the index
  printf("indexed molecules=%d fragments=%d\n",
         mcsdb.NumMols(),mcsdb.NumFragments());

  // check for (optional) query
  if (queryfile.empty())
    return 0;

  // process the query options
  unsigned int qmaxrec = 0;
  if (itf.Has<unsigned int>("-qlim"))
    qmaxrec = itf.Get<unsigned int>("-qlim");

  unsigned int numscores = 10;
  if (itf.Has<unsigned int>("-limit"))
    numscores = itf.Get<unsigned int>("-limit");

  unsigned int cnttype = OEMCSScoreType::BondCount;
  if (itf.Has<std::string>("-type"))
  {
    if (itf.Get<std::string>("-type").find("atom") != std::string::npos)
      cnttype = OEMCSScoreType::AtomCount;
    else if (itf.Get<std::string>("-type").find("bond") != std::string::npos)
      cnttype = OEMCSScoreType::BondCount;
    else
      OEThrow.Warning("Ignoring unrecognized -type option, expecting atom or bond: " + itf.Get<std::string>("-type"));
  }

  // process the queries
  OEGraphMol qmol;
  unsigned int qnum = 0;
  while (OEReadMolecule(ifsquery, qmol))
  {
    ++qnum;

    unsigned int numhits = 0u;
    for (OEIter<OEMedChem::OEMCSMolSimScore> scores = mcsdb.GetSortedScores(qmol, numscores, 0, 0, true, cnttype);
         scores;
         ++scores, ++numhits)
    {
      OEMedChem::OEMCSMolSimScore &score = scores;

      if (!numhits)
        printf("Query: %d: %s\n", qnum, qmol.GetTitle());

      printf("\trecord: %6d\ttanimoto_%s_score: %.2f\tmcs_core: %s\n",
             score.GetIdx(), ((cnttype == OEMCSScoreType::BondCount) ? "bond" : "atom" ), score.GetScore(), score.GetMCSCore().c_str());
    }
    if (!numhits)
      OEThrow.Warning("Query: %d: %s - no hits\n", qnum, qmol.GetTitle());

    if (qmaxrec && qnum >= qmaxrec)
      break;
  }
  if (!qnum)
    OEThrow.Fatal("Error reading query structure(s): " + queryfile);

  return 0;
}