OEChem Examples

Convert molecule files

/****************************************************************************
 Copyright (C) 1998-2011 OpenEye Scientific Software, Inc.
*****************************************************************************
* Program to convert from one molecule format to another
****************************************************************************/
#include <openeye.h>
#include <oesystem.h>
#include <oechem.h>

using namespace OESystem;
using namespace OEChem;

int main(int argc, char *argv[])
{
  if (argc != 3)
    OEThrow.Usage("%s <infile> <outfile>", argv[0]);

  oemolistream ifs;
  if (!ifs.open(argv[1]))
    OEThrow.Fatal("Unable to open %s for reading", argv[1]);

  oemolostream ofs;
  if (!ofs.open(argv[2])) 
    OEThrow.Fatal("Unable to open %s for writing", argv[2]);

  OEMol mol;
  while (OEReadMolecule(ifs, mol))
    OEWriteMolecule(ofs, mol);

  return 0;
}

See also

Concatenating molecules

/****************************************************************************
 Copyright (C) 2004-2011 OpenEye Scientific Software, Inc.
*****************************************************************************
* This program concatenates molecules into one file.
* It can be useful for generating ROCS queries or reattach ligands to an
* protein structure 
****************************************************************************/
#include <openeye.h>
#include <oesystem.h>
#include <oechem.h>
#include "catmols.itf"

using namespace OESystem;
using namespace OEChem;

int main(int argc, char *argv[])
{
  OEInterface itf(InterfaceData, argc, argv);

  OEGraphMol omol;
  for (OEIter<const std::string> i = itf.GetList<std::string>("-i"); i; ++i)
  {
    std::string filename = *i;
    oemolistream ifs;
    if (ifs.open(filename))
    {
      OEGraphMol imol;
      while (OEReadMolecule(ifs, imol))
        OEAddMols(omol, imol);
    }
    else
      OEThrow.Fatal("Unable to open %s for reading", filename.c_str());
  }

  oemolostream ofs;
  if (!ofs.open(itf.Get<std::string>("-o")))
    OEThrow.Fatal("Unable to open %s for writing", itf.Get<std::string>("-o").c_str());

  OEWriteMolecule(ofs, omol);

  return 0;
}

See also

Splitting multicomponent molecules

/****************************************************************************
 Copyright (C) 2010, 2011 OpenEye Scientific Software, Inc.
*****************************************************************************
* Writes each component of a molecule as a separate molecule
****************************************************************************/
#include <openeye.h>
#include <oesystem.h>
#include <oechem.h>

using namespace OESystem;
using namespace OEChem;

int main(int argc, char *argv[])
{
  if (argc == 1 || argc > 3)
    OEThrow.Usage("%s <infile> [<outfile>]", argv[0]);

  oemolistream ifs;
  if (!ifs.open(argv[1]))
    OEThrow.Fatal("Unable to open %s for reading", argv[1]);

  oemolostream ofs(".ism");
  if (argc == 3)
    if (!ofs.open(argv[2]))
      OEThrow.Fatal("Unable to open %s for writing", argv[2]);

  unsigned int incount = 0;
  unsigned int outcount = 0;
  OEGraphMol mol;
  while (OEReadMolecule(ifs, mol))
  {
    incount++;
    std::vector<unsigned int> parts(mol.GetMaxAtomIdx());
    unsigned int pcount = OEDetermineComponents(mol, &parts[0]);
    OEPartPred pred(&parts[0], mol.GetMaxAtomIdx());
    for (unsigned int i = 1; i <= pcount; ++i)
    {
      outcount++;
      pred.SelectPart(i);
      OEGraphMol partmol;
      OESubsetMol(partmol, mol, pred);
      OEWriteMolecule(ofs, partmol);
    }
  }

  std::cerr << "results:  in: " << incount << "  out: " << outcount << std::endl;

  return 0;
}

Extract molecules by title

/****************************************************************************
 Copyright (C) 2006-2011 OpenEye Scientific Software, Inc.
*****************************************************************************
* Extract compound(s) from a file based on molecule title
****************************************************************************/
#include <openeye.h>
#include <oeplatform.h>
#include <oesystem.h>
#include <oechem.h>
#include "molextract.itf"

using namespace OEPlatform;
using namespace OESystem;
using namespace OEChem;

static void MolExtract(oemolistream& ifs, oemolostream& ofs,
                       std::set<std::string>& nameset)
{
  OEMol mol;
  while (OEReadMolecule(ifs, mol))
  {
    std::string title = mol.GetTitle();
    if (nameset.find(title) != nameset.end())
      OEWriteMolecule(ofs, mol);
  }
}

int main(int argc, char* argv[])
{
  OEInterface itf(InterfaceData, argc, argv);

  bool l = itf.Has<std::string>("-list");
  bool t = itf.Has<std::string>("-title");
  if (!(l ^ t))
    OEThrow.Usage("Must give either -list or -title");

  oemolistream ifs;
  if (!ifs.open(itf.Get<std::string>("-i")))
    OEThrow.Fatal("Unable to open %s for reading", itf.Get<std::string>("-i").c_str());

  oemolostream ofs;
  if (!ofs.open(itf.Get<std::string>("-o")))
    OEThrow.Fatal("Unable to open %s for writing", itf.Get<std::string>("-o").c_str());

  //collect names
  std::set<std::string> nameset;
  if (itf.Has<std::string>("-list"))
  {
    std::string filename = itf.Get<std::string>("-list");
    oeifstream fs;
    if (!fs.open(filename))
      OEThrow.Fatal("Unable to open %s for reading", filename.c_str());
    std::string name;
    while (fs && fs.getline(name))
    {
      name = OEStringStripWhiteSpace(name);
      if (nameset.find(name) == nameset.end())
        nameset.insert(name);
    }
  }
  else if (itf.Has<std::string>("-title"))
    nameset.insert(itf.Get<std::string>("-title"));

  if (nameset.size() == 0)
    OEThrow.Fatal("No titles requested");

  MolExtract(ifs, ofs, nameset);

  return 0;
}

See also

Write out unique molecules

/****************************************************************************
 Copyright (C) 2006-2013 OpenEye Scientific Software, Inc.
*****************************************************************************
* Read molecules and write out the unique ones. Two molecules are considered
* identical if their canonical isomeric smiles are identical.
****************************************************************************/
#include <openeye.h>
#include <oesystem.h>
#include <oechem.h>

using namespace OESystem;
using namespace OEChem;

static void UniqMol(oemolistream& ifs, oemolostream& ofs)
{
  std::string smi;
  std::set<std::string> smiles;
  OEMol mol;
  while (OEReadMolecule(ifs, mol))
  {
    OECreateIsoSmiString(smi, mol);
    if (smiles.find(smi) == smiles.end())
    {
      smiles.insert(smi);
      OEWriteMolecule(ofs, mol);
    }
  }
}

int main(int argc, char* argv[])
{
  if (argc != 3)
    OEThrow.Usage("%s <infile> <outfile>", argv[0]);

  oemolistream ifs;
  if (!ifs.open(argv[1]))
    OEThrow.Fatal("Unable to open %s for reading", argv[1]);

  oemolostream ofs;
  if (!ofs.open(argv[2]))
    OEThrow.Fatal("Unable to open %s for writing", argv[2]);

  UniqMol(ifs, ofs);

  return 0;
}
/****************************************************************************
 Copyright (C) 2013 OpenEye Scientific Software, Inc.
*****************************************************************************
* Read molecules and write out the unique ones. Two molecules are considered
* identical if their InChIs are identical.
****************************************************************************/
#include <openeye.h>
#include <oesystem.h>
#include <oechem.h>

using namespace OESystem;
using namespace OEChem;

static void UniqInChI(oemolistream& ifs, oemolostream& ofs)
{
  std::string inchi;
  std::set<std::string> inchis;
  OEMol mol;
  OEInChIOptions opts;
  while (OEReadMolecule(ifs, mol))
  {
    OECreateInChI(inchi, mol, opts);
    if (inchis.find(inchi) == inchis.end())
    {
      inchis.insert(inchi);
      OEWriteMolecule(ofs, mol);
    }
  }
}

int main(int argc, char* argv[])
{
  if (argc != 3)
    OEThrow.Usage("%s <infile> <outfile>", argv[0]);

  oemolistream ifs;
  if (!ifs.open(argv[1]))
    OEThrow.Fatal("Unable to open %s for reading", argv[1]);

  oemolostream ofs;
  if (!ofs.open(argv[2]))
    OEThrow.Fatal("Unable to open %s for writing", argv[2]);

  UniqInChI(ifs, ofs);

  return 0;
}

See also

Randomize atoms of molecules

/****************************************************************************
 Copyright (C) 2010, 2011 OpenEye Scientific Software, Inc.
*****************************************************************************
* Demonstrates how to randomly reorder atoms and bonds of a molecule
****************************************************************************/
#include <openeye.h>
#include <oesystem.h>
#include <oechem.h>

using namespace OESystem;
using namespace OEChem;

int main(int argc, char *argv[])
{
  if (argc != 3)
    OEThrow.Usage("%s <infile> <outfile>", argv[0]);

  oemolistream ifs;
  if (!ifs.open(argv[1]))
    OEThrow.Fatal("Unable to open %s for reading", argv[1]);

  oemolostream ofs;
  if (!ofs.open(argv[2]))
    OEThrow.Fatal("Unable to open %s for writing", argv[2]);

  OEGraphMol mol;
  while (OEReadMolecule(ifs, mol))
  {
    OEScrambleMolecule(mol);
    OEWriteMolecule(ofs, mol);
  }

  return 0;
}

See also

Generate canonical smiles

/****************************************************************************
 Copyright (C) 2002-2013 OpenEye Scientific Software, Inc.
*****************************************************************************
* Generate canonical smiles of various flavors
****************************************************************************/
#include <openeye.h>
#include <oeplatform.h>
#include <oesystem.h>
#include <oechem.h>
#include "cansmi.itf"

using namespace OEPlatform;
using namespace OESystem;
using namespace OEChem;

/***************************************************************************
 To create unique Kekule smiles, must reperceive bond orders from
 scratch to avoid arbitrary nondeterministic variations, e.g.,
 CC1=C(O)C=CC=C1 vs. CC1=CC=CC=C1O
 This is why OESMILESFlag_Kekule is not sufficient and not used.
***************************************************************************/

static std::string CanSmi(OEGraphMol& mol, bool isomeric, bool kekule)
{
  OEFindRingAtomsAndBonds(mol);
  OEAssignAromaticFlags(mol, OEAroModel::OpenEye);
  unsigned int smiflag = OESMILESFlag::Canonical;
  if (isomeric)
    smiflag |= OESMILESFlag::ISOMERIC;

  if (kekule)
  {
    for (OEIter<OEBondBase> bond = mol.GetBonds(OEIsAromaticBond()); bond; ++bond)
      bond->SetIntType(5);
    OECanonicalOrderAtoms(mol);
    OECanonicalOrderBonds(mol);
    OEClearAromaticFlags(mol);
    OEKekulize(mol);
  }

  std::string smi;
  OECreateSmiString(smi, mol, smiflag);
  return smi;
}

int main(int argc, char * argv[])
{
  OEInterface itf(InterfaceData, argc, argv);

  bool isomeric = itf.Get<bool>("-isomeric");
  bool kekule   = itf.Get<bool>("-kekule");
  bool from3d   = itf.Get<bool>("-from3d");

  if (from3d)
    isomeric = true;

  oemolistream ifs;
  std::string ifile = itf.Get<std::string>("-i");
  if (!ifs.open(ifile))
    OEThrow.Fatal("Unable to open %s for reading", ifile.c_str());

  oeofstream ofs = oeout;
  if (itf.Has<std::string>("-o"))
  {
    std::string ofile = itf.Get<std::string>("-o");
    if (!ofs.open(ofile))
      OEThrow.Fatal("Unable to open %s for writing", ofile.c_str());
  }

  OEGraphMol mol;
  while (OEReadMolecule(ifs, mol))
  {
    if (from3d)
      OE3DToInternalStereo(mol);
    std::string smi = CanSmi(mol, isomeric, kekule);
    std::string title = mol.GetTitle();
    if (!title.empty())
      smi += std::string(" ") + title;

    ofs << smi << oeendl;
  }

  return 0;
}

Filter molecules by weight or heavy atom count

/****************************************************************************
 Copyright (C) 2010, 2011 OpenEye Scientific Software, Inc.
*****************************************************************************
* Filter out molecules by their molecular weight or heavy atom count
****************************************************************************/
#include <openeye.h>
#include <oesystem.h>
#include <oechem.h>
#include "sizefilter.itf"

#include <limits>

using namespace OESystem;
using namespace OEChem;

bool IsMoleculeInHeavyAtomCountRange(unsigned int min, unsigned int max, OEMol& mol);
bool IsMoleculeInMolWtRange(double min, double max, OEMol& mol);
bool IsBetween(unsigned int min, unsigned int max, unsigned int val);
bool IsBetween(double min, double max, double val);

int main(int argc, char *argv[])
{
  OEInterface itf(InterfaceData, argc, argv);

  oemolistream ifs;
  if (!ifs.open(itf.Get<std::string>("-i")))
    OEThrow.Fatal("Unable to open %s for reading", itf.Get<std::string>("-i").c_str());

  oemolostream ofs(".ism");
  if (itf.Has<std::string>("-o"))
    if (!ofs.open(itf.Get<std::string>("-o")))
      OEThrow.Fatal("Unable to open %s for writing", itf.Get<std::string>("-o").c_str());

  unsigned int minhac = std::numeric_limits<unsigned int>::min();
  if (itf.Has<unsigned int>("-minhac"))
    minhac = itf.Get<unsigned int>("-minhac");

  unsigned int maxhac = std::numeric_limits<unsigned int>::max();
  if (itf.Has<unsigned int>("-maxhac"))
    maxhac = itf.Get<unsigned int>("-maxhac");

  double minwt = std::numeric_limits<double>::min();
  if (itf.Has<double>("-minwt"))
    minwt = itf.Get<double>("-minwt");

  double maxwt = std::numeric_limits<double>::max();
  if (itf.Has<double>("-maxwt"))
    maxwt = itf.Get<double>("-maxwt");

  OEMol mol;
  while (OEReadMolecule(ifs, mol))
  {
    if (!IsMoleculeInHeavyAtomCountRange(minhac, maxhac, mol))
      continue;

    if (!IsMoleculeInMolWtRange(minwt, maxwt, mol))
      continue;

    OEWriteMolecule(ofs, mol);
  }

  return 0;
}

bool IsMoleculeInHeavyAtomCountRange(unsigned int min, unsigned int max, OEMol& mol)
{
  unsigned int count = OECount(mol, OEIsHeavy());
  return IsBetween(min, max, count);
}

bool IsMoleculeInMolWtRange(double min, double max, OEMol& mol)
{
  double molwt = OECalculateMolecularWeight(mol);
  return IsBetween(min, max, molwt);
}

bool IsBetween(unsigned int min, unsigned int max, unsigned int val)
{
  if (val >= min && val <= max)
    return true;

  return false;
}

bool IsBetween(double min, double max, double val)
{
  if (val >= min && val <= max)
    return true;

  return false;
}

See also

Strip salts

/****************************************************************************
 Copyright (C) 2010, 2011 OpenEye Scientific Software, Inc.
*****************************************************************************
* Remove salts and/or remove all but the largest molecule
****************************************************************************/
#include <openeye.h>
#include <oesystem.h>
#include <oechem.h>

using namespace OESystem;
using namespace OEChem;

int main(int argc, char *argv[])
{
  if (argc != 3)
    OEThrow.Usage("%s <infile> <outfile>", argv[0]);

  oemolistream ifs;
  if (!ifs.open(argv[1]))
    OEThrow.Fatal("Unable to open %s for reading", argv[1]);

  oemolostream ofs;
  if (!ofs.open(argv[2])) 
    OEThrow.Fatal("Unable to open %s for writing", argv[2]);

  OEMol mol;
  while (OEReadMolecule(ifs, mol))
  {
    OETheFunctionFormerlyKnownAsStripSalts(mol);
    OEWriteMolecule(ofs, mol);
  }
  return 0;
}

Extract rings

/****************************************************************************
 Copyright (C) 2004-2011 OpenEye Scientific Software, Inc.
*****************************************************************************
* Extracting rings/ring systems from input molecules
****************************************************************************/
#include <openeye.h>
#include <oesystem.h>
#include <oechem.h>
#include "ringsubset.itf"

using namespace OESystem;
using namespace OEChem;


static void RingSubSet(oemolistream& ifs, oemolostream& ofs, bool exo)
{
  OEGraphMol mol;
  while (OEReadMolecule(ifs, mol))
  {
    OEGraphMol submol;
    bool adjustHcount = true;
    if (exo)
    {
      OEAtomIsInRing isinring;
      OEIsNonRingAtomDoubleBondedToRing isexo;
      OEOr<OEAtomBase> includeexo(isinring, isexo);
      OESubsetMol(submol, mol, includeexo, adjustHcount);
    }
    else
      OESubsetMol(submol, mol, OEAtomIsInRing(), adjustHcount);
    std::string title = mol.GetTitle() + std::string("_rings");
    submol.SetTitle(title);
    if (submol.NumAtoms() != 0)
      OEWriteMolecule(ofs, submol);
  }
}

int main(int argc, char* argv[])
{
  OEInterface itf(InterfaceData, argc, argv);

  bool exo_dbl_bonds = itf.Get<bool>("-exo");

  oemolistream ifs;
  if (!ifs.open(itf.Get<std::string>("-i")))
    OEThrow.Fatal("Unable to open %s for reading", itf.Get<std::string>("-i").c_str());

  oemolostream ofs(".ism");
  if ((itf.Has<std::string>("-o")))
    if (!ofs.open(itf.Get<std::string>("-o")))
      OEThrow.Fatal("Unable to open %s for writing", itf.Get<std::string>("-o").c_str());

  RingSubSet(ifs, ofs, exo_dbl_bonds);

  return 0;
}

Extract molecule scaffolds

/****************************************************************************
 Copyright (C) 2009-2011 OpenEye Scientific Software, Inc.
*****************************************************************************
* Extract the ring scaffold of a molecule
****************************************************************************/
#include <openeye.h>
#include <oesystem.h>
#include <oechem.h>
#include "extractscaffold.itf"

using namespace OESystem;
using namespace OEChem;

bool TraverseForRing(bool *visited, const OEAtomBase *atom)
{
  visited[atom->GetIdx()] = true;

  for (OEIter<OEAtomBase> nbor = atom->GetAtoms(); nbor; ++nbor)
  {
    if (!visited[nbor->GetIdx()])
    {
      if (nbor->IsInRing())
        return true;

      if (TraverseForRing(visited, nbor))
        return true;
    }
  }
  return false;
}

bool DepthFirstSearchForRing(const OEAtomBase *root, const OEAtomBase *nbor)
{
  unsigned int natoms = root->GetParent()->GetMaxAtomIdx();
  bool *visited = (bool *)OEMalloca(sizeof(bool) * natoms);
  memset(visited, 0, sizeof(bool) * natoms);

  visited[root->GetIdx()] = true;
  bool ret = TraverseForRing(visited, nbor);

  OEFreea(visited);
  return ret;
}

class IsInScaffold : public OEUnaryPredicate<OEAtomBase>
{
  public:
    bool operator() (const OEAtomBase &atom) const
    {
      if (atom.IsInRing())
        return true;

      unsigned int count = 0;
      for (OEIter<OEAtomBase> nbor = atom.GetAtoms(); nbor; ++nbor)
        if (DepthFirstSearchForRing(&atom, nbor))
          ++count;
      
      return count > 1;
    }
    OEUnaryFunction<OEAtomBase, bool> *CreateCopy() const
    {
      return new IsInScaffold(*this);
    }
};

int main(int argc, char* argv[])
{
  OEInterface itf(InterfaceData, argc, argv);

  bool exo_dbl_bonds = itf.Get<bool>("-exo");

  oemolistream ifs;
  if (!ifs.open(itf.Get<std::string>("-i")))
    OEThrow.Fatal("Unable to open %s for reading", itf.Get<std::string>("-i").c_str());

  oemolostream ofs;
  if (!ofs.open(itf.Get<std::string>("-o")))
    OEThrow.Fatal("Unable to open %s for writing", itf.Get<std::string>("-o").c_str());

  OEMol src;
  OEMol dst;
  while (OEReadMolecule(ifs, src))
  {
    bool adjustHcount = true;
    if (exo_dbl_bonds)
      OESubsetMol(dst, src,
                  OEOr<OEAtomBase>(IsInScaffold(), OEIsNonRingAtomDoubleBondedToRing()),
                  adjustHcount);
    else
      OESubsetMol(dst, src, IsInScaffold(), adjustHcount);

    if (dst)
      OEWriteMolecule(ofs, dst);
  }

  return 0;
}

Extract random molecule subset

/****************************************************************************
 Copyright (C) 2011, 2012 OpenEye Scientific Software, Inc.
*****************************************************************************
* Randomly reorder molecules and optionally obtain a random subset
****************************************************************************/
#include "openeye.h"
#include "oesystem.h"
#include "oechem.h"
#include "randomsample.itf"

#include <algorithm>

using namespace OESystem;
using namespace OEChem;

static void LoadDatabase(oemolistream& ifs,
                         std::vector<OEMol> &mols,
                         unsigned int count)
{
  unsigned int readcount = 0;
  OEMol mol(OEMCMolType::OEDBMCMol);
  while (OEReadMolecule(ifs, mol))
  {
    readcount++;
    mol.Compress();
    mols.push_back(mol);
    if (readcount == count)
      break;
  }
}

static void WriteDatabase(oemolostream& ofs,
                          std::vector<OEMol> &mols,
                          unsigned int count)
{
  unsigned int outcount = 0;
  std::vector<OEMol>::iterator v_i;
  for (v_i = mols.begin(); v_i != mols.end(); ++v_i)
  {
    OEMol dbmol = *v_i;
    dbmol.UnCompress();
    OEWriteMolecule(ofs, dbmol);
    outcount++;
    if (outcount == count)
      break;
  }
}

static void RandomizePercent(oemolistream& ifs,
                             oemolostream& ofs,
                             float percent)
{
  std::vector<OEMol> mols;
  LoadDatabase(ifs, mols, 0);

  random_shuffle(mols.begin(), mols.end());

  std::vector<OEMol>::size_type size = mols.size();
  unsigned int count = (unsigned int)(percent * 0.01 * (double)size);
  if (count < 1)
    count = 1;
  WriteDatabase(ofs, mols, count);
}

static void Randomize(oemolistream& ifs,
                      oemolostream& ofs)
{
  float wholedb = 100;
  RandomizePercent(ifs, ofs, wholedb);
}

static double RandBetweenZeroAndOne()
{
  double rval = (double)rand();
  double devisor = ((double)RAND_MAX) + 1.0;
  return rval / devisor;
}

static void RandomizeN(oemolistream& ifs,
                       oemolostream& ofs,
                       unsigned int count)
{

  std::vector<OEMol> mols;
  mols.reserve(count);
  LoadDatabase(ifs, mols, count);

  unsigned int readcount = 0;
  OEMol mol(OEMCMolType::OEDBMCMol);
  while (OEReadMolecule(ifs, mol))
  {
    double devisor = (double)(count + readcount + 1);
    if ((double)count / devisor > RandBetweenZeroAndOne())
    {
      size_t idx = (size_t)(((double)count) * RandBetweenZeroAndOne());
      mol.Compress();
      mols[idx] = mol;
    }
    readcount++;
  }

  random_shuffle(mols.begin(), mols.end());
  WriteDatabase(ofs, mols, count);
}

int main(int argc, char *argv[])
{
  OEInterface itf(InterfaceData, argc, argv);

  if (itf.Has<float>("-p") && itf.Has<int>("-n"))
    OEThrow.Usage("Give only one option, -p or -n");

  oemolistream ifs;
  if (!ifs.open(itf.Get<std::string>("-i")))
    OEThrow.Fatal("Unable to open %s for reading", itf.Get<std::string>("-i").c_str());

  oemolostream ofs(".ism");
  if (itf.Has<std::string>("-o"))
    if (!ofs.open(itf.Get<std::string>("-o")))
      OEThrow.Fatal("Unable to open %s for writing", itf.Get<std::string>("-o").c_str());

  if (itf.Has<unsigned int>("-seed"))
    srand(itf.Get<unsigned int>("-seed"));
  else
    srand( (unsigned)time((time_t*)0));

  if (itf.Has<unsigned int>("-n"))
    RandomizeN(ifs, ofs, itf.Get<unsigned int>("-n"));
  else if (itf.Has<float>("-p"))
    RandomizePercent(ifs, ofs, itf.Get<float>("-p"));
  else
    Randomize(ifs, ofs);

  return 0;
}

See also

Performing a reaction

/****************************************************************************
 Copyright (C) 2002-2017 OpenEye Scientific Software, Inc.
*****************************************************************************
* Perform reactions on the given compounds
****************************************************************************/
#include <openeye.h>
#include <oesystem.h>
#include <oechem.h>

using namespace OESystem;
using namespace OEChem;

static void UniMolRxn(oemolistream& ifs, 
                      oemolostream& ofs, 
                      const OEUniMolecularRxn& umr)
{
  OEGraphMol mol;
  while (OEReadMolecule(ifs, mol))
    if (umr(mol))
      OEWriteMolecule(ofs, mol);
}

int main(int argc, char *argv[])
{
  if (argc < 3 || argc > 4)
    OEThrow.Usage("%s SMIRKS <infile> [<outfile>]", argv[0]);

  OEQMol qmol;
  if (!OEParseSmirks(qmol, argv[1]))
    OEThrow.Fatal("Unable to parse SMIRKS: %s", argv[1]);

  OEUniMolecularRxn umr;
  if (!umr.Init(qmol))
    OEThrow.Fatal("Failed to initialize reaction with %s SMIRKS", argv[1]);
  umr.SetClearCoordinates(true);

  oemolistream ifs;
  if (!ifs.open(argv[2]))
    OEThrow.Fatal("Unable to open %s for reading", argv[2]);

  oemolostream ofs(".ism");
  if (argc == 4)
    if (!ofs.open(argv[3]))
      OEThrow.Fatal("Unable to open %s for writing", argv[3]);

  UniMolRxn(ifs, ofs, umr);

  return 0;
}

See also

Library generation

/****************************************************************************
 Copyright (C) 2004-2017 OpenEye Scientific Software, Inc.
*****************************************************************************
* Perform library generation with SMIRKS
****************************************************************************/
#include <openeye.h>
#include <oesystem.h>
#include <oechem.h>
#include "libgen.itf" 

using namespace OESystem;
using namespace OEChem;
using namespace std;

static void  LibGen(OELibraryGen& libgen, oemolostream& ofs, bool isomeric, bool unique)
{
  unsigned int smiflag = OESMILESFlag::DEFAULT; // Canonical|AtomMaps|RGroups
  if (isomeric)
    smiflag |= OESMILESFlag::ISOMERIC;

  set<string> uniqueproducts;
  for (OEIter<OEMolBase> mol = libgen.GetProducts(); mol; ++mol)
  {
    string smiles;
    OECreateSmiString(smiles, mol, smiflag);
    if (!unique || uniqueproducts.find(smiles) == uniqueproducts.end())
    {
      uniqueproducts.insert(smiles);
      OEWriteMolecule(ofs, mol);
    }
  }
}

int main(int argc, char *argv[])
{
  OEInterface itf(InterfaceData, argc, argv);

  if (!(itf.Has<string>("-smirks") ^ itf.Has<string>("-rxn")))
    OEThrow.Fatal("Please provide SMIRKS string or MDL reaction file");

  OEQMol reaction;
  if (itf.Has<string>("-smirks"))
  {
    string smirks   = itf.Get<string>("-smirks");
    if (!OEParseSmirks(reaction, smirks.c_str()))
      OEThrow.Fatal("Unable to parse SMIRKS: %s", smirks.c_str());
  }
  else
  {
    string rxn = itf.Get<string>("-rxn");
    oemolistream rfile(rxn);
    unsigned int opt = OEMDLQueryOpts::ReactionQuery|OEMDLQueryOpts::SuppressExplicitH;
    if (!OEReadMDLReactionQueryFile(rfile, reaction, opt))      
      OEThrow.Fatal("Unable to read reaction file: %s", rxn.c_str());
  }

  bool relax      = itf.Get<bool>("-relax");
  bool unique     = itf.Get<bool>("-unique");
  bool implicitH  = itf.Get<bool>("-implicitH");
  bool valcorrect = itf.Get<bool>("-valence");
  bool isomeric   = itf.Get<bool>("-isomeric");

  OELibraryGen libgen;
  // Initialize library generation
  if (!libgen.Init(reaction, !relax))
    OEThrow.Fatal("failed to initialize library generator");
  libgen.SetValenceCorrection(valcorrect);
  libgen.SetExplicitHydrogens(!implicitH);
  libgen.SetClearCoordinates(true);

  unsigned int nrReacts = 0;
  for (nrReacts = 0; itf.Has<string>("-reactants", nrReacts); ++nrReacts)
  {
    string fileName = itf.Get<string>("-reactants", nrReacts);
    if (nrReacts >= libgen.NumReactants())
      OEThrow.Fatal("Number of reactant files exceeds number of reactants specified in reaction");
    oemolistream ifs;
    if (!ifs.open(fileName))
      OEThrow.Fatal("Unable to read %s reactant file", fileName.c_str());
    libgen.SetStartingMaterial(ifs.GetMolBases(), nrReacts, unique);
  }
  if (nrReacts != libgen.NumReactants())
    OEThrow.Fatal("Reactions requires %d reactant files!", libgen.NumReactants());

  oemolostream ofs(".ism");
  if (itf.Has<string>("-product"))
  {
    string fileName = itf.Get<string>("-product");
    if (!ofs.open(fileName))
      OEThrow.Fatal("Unable to open %s for writing", fileName.c_str());
  }

  LibGen(libgen, ofs, isomeric, unique);

  return 0;
}

See also

Perform substructure searches

/****************************************************************************
 Copyright (C) 2002-2011 OpenEye Scientific Software, Inc.
*****************************************************************************
* Perform substructure search on molecule file
****************************************************************************/
#include <openeye.h>
#include <oesystem.h>
#include <oechem.h>
#include "molgrep.itf"

using namespace OESystem;
using namespace OEChem;

static void SubSearch(OEInterface& itf, OESubSearch& ss,
                      oemolistream& ifs, oemolostream& ofs)
{
  bool reverseflag = itf.Get<bool>("-r");
  bool countflag   = itf.Get<bool>("-c");
  unsigned int count = 0;
  OEGraphMol mol;
  while (OEReadMolecule(ifs, mol))
  {
    OEPrepareSearch(mol, ss);
    if (ss.SingleMatch(mol) != reverseflag)
    {
      if (countflag)
        count++;
      else
        OEWriteMolecule(ofs, mol);
    }
  }
  if (countflag)
    std::cout << count << " matching molecules"  <<  std::endl;
}

int main(int argc, char *argv[])
{
  OEInterface itf(InterfaceData, argc, argv);
  if (!(itf.Get<bool>("-c") ^ itf.Has<std::string>("-o")))
    OEThrow.Fatal("Counting (-c) or output (-o) must be specified and are mutually exclusive.");

  oemolistream ifs;
  if (!ifs.open(itf.Get<std::string>("-i")))
    OEThrow.Fatal("Unable to open %s for reading", itf.Get<std::string>("-i").c_str());

  oemolostream ofs;
  if (!itf.Get<bool>("-c"))
    if (!ofs.open(itf.Get<std::string>("-o")))
      OEThrow.Fatal("Unable to open %s for writing", itf.Get<std::string>("-o").c_str());

  std::string smarts = itf.Get<std::string>("-p");
  OESubSearch ss;
  if (!ss.Init(smarts.c_str()))
    OEThrow.Fatal("Unable to parse SMARTS: %s", smarts.c_str());

  SubSearch(itf, ss, ifs, ofs);

  return 0;
}

See also

Align molecules by maximum common substructure

/****************************************************************************
 Copyright (C) 2006-2012 OpenEye Scientific Software, Inc.
*****************************************************************************
* Align two compounds based on the maximum common substructure
****************************************************************************/
#include <openeye.h>
#include <oesystem.h>
#include <oechem.h>
using namespace OESystem;
using namespace OEChem;

static void MCSAlign(OEMolBase& refmol, OEMolBase& fitmol,
                     oemolostream& ofs)
{
  unsigned int atomexpr = OEExprOpts::AtomicNumber|OEExprOpts::Aromaticity;
  unsigned int bondexpr = 0; // ignore bond order
  OEMCSSearch mcss(OEMCSType::Exhaustive);
  mcss.Init(refmol, atomexpr, bondexpr);
  mcss.SetMCSFunc(OEMCSMaxBondsCompleteCycles());

  bool unique = true;
  for (OEIter<OEMatchBase> mi = mcss.Match(fitmol, unique); mi; ++mi)
  {
    double rmat[9];
    double trans[3];
    bool overlay = true;
    double rms = OERMSD(mcss.GetPattern(), fitmol, *mi, overlay, rmat, trans);
    if (rms < 0.0)
    {
      OEThrow.Warning("RMS overlay failure");
      continue;
    }
    OERotate(fitmol, rmat);
    OETranslate(fitmol, trans);
    OEWriteMolecule(ofs, fitmol);
  }
}

static bool Is3DFormat(unsigned int fmt)
{
  if (fmt == OEFormat::SMI || fmt == OEFormat::ISM ||
      fmt == OEFormat::CAN || fmt == OEFormat::MF)
    return false;
  return true;
}

int main(int argc, char* argv[])
{
  if (argc != 4)
    OEThrow.Usage("%s <refmol> <fitmol> <outfile>", argv[0]);

  oemolistream reffs;
  if (!reffs.open(argv[1]))
    OEThrow.Fatal("Unable to open %s for reading", argv[1]);
  if (!Is3DFormat(reffs.GetFormat()))
    OEThrow.Fatal("Invalid input format: need 3D coordinates");
  OEGraphMol refmol;
  if (!OEReadMolecule(reffs, refmol))
    OEThrow.Fatal("Unable to read molecule in %s", argv[1]);
  if (refmol.GetDimension() != 3)
    OEThrow.Fatal("%s doesn't have 3D coordinates", refmol.GetTitle());

  oemolistream fitfs;
  if (!fitfs.open(argv[2]))
    OEThrow.Fatal("Unable to open %s for reading", argv[2]);
  if (!Is3DFormat(fitfs.GetFormat()))
    OEThrow.Fatal("Invalid input format: need 3D coordinates");

  oemolostream ofs;
  if (!ofs.open(argv[3]))
    OEThrow.Fatal("Unable to open %s for writing", argv[3]);
  if (!Is3DFormat(ofs.GetFormat()))
    OEThrow.Fatal("Invalid output format: need 3D coordinates");

  OEWriteConstMolecule(ofs, refmol);
  OESuppressHydrogens(refmol);

  OEGraphMol fitmol;
  while (OEReadMolecule(fitfs, fitmol))
  {
    if (fitmol.GetDimension() != 3)
    {
      OEThrow.Warning("%s doesn't have 3D coordinates", fitmol.GetTitle());
      continue;
    }
    MCSAlign(refmol, fitmol, ofs);
  }

  return 0;
}

See also

Align molecules by clique match

/****************************************************************************
 Copyright (C) 2010, 2011 OpenEye Scientific Software, Inc.
*****************************************************************************
* Align two compounds based on the clique match
****************************************************************************/
#include <openeye.h>
#include <oesystem.h>
#include <oechem.h>

using namespace OESystem;
using namespace OEChem;

static void CliqueAlign(OEMolBase& refmol, OEMolBase& fitmol,
                        oemolostream& ofs)
{
  OECliqueSearch cs(refmol, OEExprOpts::DefaultAtoms, OEExprOpts::DefaultBonds);
  cs.SetSaveRange(5);
  cs.SetMinAtoms(6);

  for (OEIter<OEMatchBase> mi = cs.Match(fitmol); mi; ++mi)
  {
    double rmat[9];
    double trans[3];
    bool overlay = true;
    OERMSD(cs.GetPattern(), fitmol, *mi, overlay, rmat, trans);
    OERotate(fitmol, rmat);
    OETranslate(fitmol, trans);
    OEWriteMolecule(ofs, fitmol);
  }
}

static bool Is3DFormat(unsigned int fmt)
{
  if (fmt == OEFormat::SMI || fmt == OEFormat::ISM ||
      fmt == OEFormat::CAN || fmt == OEFormat::MF)
    return false;
  return true;
}

int main(int argc, char* argv[])
{
  if (argc != 4)
    OEThrow.Usage("%s <refmol> <fitmol> <outfile>", argv[0]);

  oemolistream reffs;
  if (!reffs.open(argv[1]))
    OEThrow.Fatal("Unable to open %s for reading", argv[1]);
  if (!Is3DFormat(reffs.GetFormat()))
    OEThrow.Fatal("Invalid input format: need 3D coordinates");
  OEGraphMol refmol;
  if (!OEReadMolecule(reffs, refmol))
    OEThrow.Fatal("Unable to read molecule in %s", argv[1]);
  if (refmol.GetDimension() != 3)
    OEThrow.Fatal("%s doesn't have 3D coordinates", refmol.GetTitle());

  oemolistream fitfs;
  if (!fitfs.open(argv[2]))
    OEThrow.Fatal("Unable to open %s for reading", argv[2]);
  if (!Is3DFormat(fitfs.GetFormat()))
    OEThrow.Fatal("Invalid input format: need 3D coordinates");

  oemolostream ofs;
  if (!ofs.open(argv[3]))
    OEThrow.Fatal("Unable to open %s for writing", argv[3]);
  if (!Is3DFormat(ofs.GetFormat()))
    OEThrow.Fatal("Invalid output format: need 3D coordinates");

  OEWriteConstMolecule(ofs, refmol);
  OESuppressHydrogens(refmol);

  OEGraphMol fitmol;
  while (OEReadMolecule(fitfs, fitmol))
  {
    if (fitmol.GetDimension() != 3)
    {
      OEThrow.Warning("%s doesn't have 3D coordinates", fitmol.GetTitle());
      continue;
    }
    CliqueAlign(refmol, fitmol, ofs);
  }
  return 0;
}

See also

Align molecules by SMARTS

/****************************************************************************
 Copyright (C) 2010, 2011 OpenEye Scientific Software, Inc.
*****************************************************************************
* Align two compounds based on smarts match
****************************************************************************/
#include <openeye.h>
#include <oesystem.h>
#include <oechem.h>

using namespace OESystem;
using namespace OEChem;

static void SmartsAlign(OEMolBase& refmol, OEMolBase& fitmol,
                        OESubSearch& ss, oemolostream& ofs)
{
  bool unique = true;
  for (OEIter<OEMatchBase> match1 = ss.Match(refmol, unique); match1; ++match1)
  {
    for (OEIter<OEMatchBase> match2 = ss.Match(fitmol, unique); match2; ++match2)
    {
      OEIter<OEMatchPair<OEAtomBase> > apr1 = match1->GetAtoms();
      OEIter<OEMatchPair<OEAtomBase> > apr2 = match2->GetAtoms();
      OEMatch match;
      for ( ; apr1; ++apr1, ++apr2)
        match.AddPair(apr1->target, apr2->target);

      const bool overlay = true;
      double rot[9];
      double trans[3];
      OERMSD(refmol, fitmol, match, overlay, rot, trans);
      OERotate(fitmol, rot);
      OETranslate(fitmol, trans);
      OEWriteConstMolecule(ofs, fitmol);
    }
  }
}

int main(int argc, char* argv[])
{
  if (argc != 5)
    OEThrow.Usage("%s <refmol> <fitmol> <outfile> <smarts>", argv[0]);

  oemolistream reffs;
  if (!reffs.open(argv[1]))
    OEThrow.Fatal("Unable to open %s for reading", argv[1]);
  if (!OEIs3DFormat(reffs.GetFormat()))
    OEThrow.Fatal("Invalid input format: need 3D coordinates");
  OEGraphMol refmol;
  if (!OEReadMolecule(reffs, refmol))
    OEThrow.Fatal("Unable to read molecule in %s", argv[1]);
  if (refmol.GetDimension() != 3)
    OEThrow.Fatal("%s doesn't have 3D coordinates", refmol.GetTitle());

  oemolistream fitfs;
  if (!fitfs.open(argv[2]))
    OEThrow.Fatal("Unable to open %s for reading", argv[2]);
  if (!OEIs3DFormat(fitfs.GetFormat()))
    OEThrow.Fatal("Invalid input format: need 3D coordinates");

  oemolostream ofs;
  if (!ofs.open(argv[3]))
    OEThrow.Fatal("Unable to open %s for writing", argv[3]);
  if (!OEIs3DFormat(ofs.GetFormat()))
    OEThrow.Fatal("Invalid output format: need 3D coordinates");

  OEWriteConstMolecule(ofs, refmol);

  OESubSearch ss;
  if (!ss.Init(argv[4]))
    OEThrow.Fatal("Unable to parse SMARTS: %s", argv[4]);


  OEPrepareSearch(refmol, ss);
  if (!ss.SingleMatch(refmol))
    OEThrow.Fatal("SMARTS fails to match refmol");

  OEGraphMol fitmol;
  while (OEReadMolecule(fitfs, fitmol))
  {
    if (fitmol.GetDimension() != 3)
    {
      OEThrow.Warning("%s doesn't have 3D coordinates", fitmol.GetTitle());
      continue;
    }
    OEPrepareSearch(fitmol, ss);
    if (!ss.SingleMatch(fitmol))
    {
      OEThrow.Warning("SMARTS fails to match fitmol %s", fitmol.GetTitle());
      continue;
    }

    SmartsAlign(refmol, fitmol, ss, ofs);
  }

  return 0;
}

See also

Align multi-conformer molecules

/****************************************************************************
 * Copyright 2004-2015 OpenEye Scientific Software, Inc.
 *****************************************************************************
 * Performing RMSD calculation between a 3D reference molecule and
 * multi-conformer molecules
 ****************************************************************************/

#include <vector>
#include <openeye.h>
#include <oesystem.h>
#include <oechem.h>

#include "rmsd.itf"

using namespace std;
using namespace OESystem;
using namespace OEChem;

int main(int argc, char* argv[])
{
  OEInterface itf(InterfaceData, argc, argv);

  if (!itf.Get<bool>("-verbose"))
    OEThrow.SetLevel(OEErrorLevel::Warning);
  
  const string rfname = itf.Get<string>("-ref");
  const string ifname = itf.Get<string>("-in");

  const bool automorph = itf.Get<bool>("-automorph");
  const bool heavy = itf.Get<bool>("-heavyonly");
  const bool overlay = itf.Get<bool>("-overlay");

  oemolistream refifs;
  if (!refifs.open(rfname))
    OEThrow.Fatal("Unable to open %s for reading", rfname.c_str());

  OEGraphMol rmol;
  if (!OEReadMolecule(refifs, rmol))
    OEThrow.Fatal("Unable to read reference molecule");

  oemolistream ifs;
  if (!ifs.open(ifname))
    OEThrow.Fatal("Unable to open %s for reading", ifname.c_str());

  oemolostream ofs;
  if (itf.Has<string>("-out"))
  {
    const string ofname = itf.Get<string>("-out");
    if (!ofs.open(ofname))
      OEThrow.Fatal("Unable to open %s for writing", ofname.c_str());
    if (!overlay)
      OEThrow.Warning("Output is the same as input when overlay is false");
  }

  OEMol mol;
  while (OEReadMolecule(ifs, mol))
  {
    OEThrow.Info(mol.GetTitle());
    const unsigned int maxIdx = mol.GetMaxConfIdx();
    vector<double> rmsds(maxIdx, 0.0);
    vector<double> rmtx(9 * maxIdx, 0.0);
    vector<double> tmtx(3 * maxIdx, 0.0);

    // performing RMSD for all conformers
    OERMSD(rmol, mol, &rmsds[0], automorph, heavy, overlay, &rmtx[0], &tmtx[0]);

    for (OEIter<OEConfBase> ci = mol.GetConfs(); ci; ++ci)
    {
      const unsigned int cidx = ci->GetIdx(); 
      OEThrow.Info("Conformer %d : rmsd = %.4f", cidx, rmsds[cidx]);
      if (overlay) 
      {
        OERotate(*ci, &rmtx[0] + cidx * 9);
        OETranslate(*ci, &tmtx[0] + cidx*3);
      }
    }
    if (itf.Has<string>("-out"))
      OEWriteMolecule(ofs, mol);
  }

  return 0;
}

See also

Modifying SD tags

/****************************************************************************
 Copyright (C) 2010, 2011 OpenEye Scientific Software, Inc.
*****************************************************************************
* Modifies the SD data of a set of input molecules by clearing all tags,
* defining which tags to keep or defining which tags to remove
****************************************************************************/
#include <openeye.h>
#include <oesystem.h>
#include <oechem.h>
#include "sdfmodprops.itf"

using namespace OESystem;
using namespace OEChem;

static void ClearProps(oemolistream& ifs,
                       oemolostream& ofs)
{
  OEGraphMol mol;
  while (OEReadMolecule(ifs, mol))
  {
    OEClearSDData(mol);
    OEWriteMolecule(ofs, mol);
  }
}

static void KeepProps(OEIter<const std::string>& proplist,
                      oemolistream& ifs,
                      oemolostream& ofs)
{
  std::set<std::string> props;
  for (proplist.ToFirst(); proplist; ++proplist)
    props.insert(*proplist);

  OEGraphMol mol;
  while (OEReadMolecule(ifs, mol))
  {
    for (OEIter<OESDDataPair> dp = OEGetSDDataPairs(mol); dp; ++dp)
      if (props.find(dp->GetTag()) == props.end())
        OEDeleteSDData(mol, dp->GetTag());

    OEWriteMolecule(ofs, mol);
  }

}

static void RemoveProps(OEIter<const std::string>& proplist,
                        oemolistream& ifs,
                        oemolostream& ofs)
{
  OEGraphMol mol;
  while (OEReadMolecule(ifs, mol))
  {
    for (proplist.ToFirst(); proplist; ++proplist)
      OEDeleteSDData(mol, *proplist);

    OEWriteMolecule(ofs, mol);
  }
}

static void ModProps(OEInterface& itf,
                     oemolistream& ifs,
                     oemolostream& ofs)
{

  if (itf.Has<std::string>("-keep"))
  {
    OEIter<const std::string> proplist = itf.GetList<std::string>("-keep");
    KeepProps(proplist, ifs, ofs);
  }
  else if (itf.Has<std::string>("-remove"))
  {
    OEIter<const std::string> proplist = itf.GetList<std::string>("-remove");
    RemoveProps(proplist, ifs, ofs);
  }
  else if (itf.Get<bool>("-clearAll"))
    ClearProps(ifs, ofs);
}

int main(int argc, char *argv[])
{
  OEInterface itf(InterfaceData, argc, argv);

  unsigned int numoption = 0;
  if (itf.Has<std::string>("-keep"))
    ++numoption;

  if (itf.Has<std::string>("-remove"))
    ++numoption;

  if (itf.Get<bool>("-clearAll"))
    ++numoption;

  if (numoption != 1)
    OEThrow.Usage("Need to pick one from -keep, -remove, or -clearAll");

  oemolistream ifs;
  if (!ifs.open(itf.Get<std::string>("-i")))
    OEThrow.Fatal("Unable to open %s for reading", itf.Get<std::string>("-i").c_str());
  if (!OEIsSDDataFormat(ifs.GetFormat()))
    OEThrow.Fatal("Only works for input file formats that support SD data (sdf,oeb,csv)");

  oemolostream ofs;
  if (!ofs.open(itf.Get<std::string>("-o")))
    OEThrow.Fatal("Unable to open %s for writing", itf.Get<std::string>("-o").c_str());
  if (!OEIsSDDataFormat(ofs.GetFormat()))
    OEThrow.Fatal("Only works for output file formats that support SD data (sdf,oeb,csv)");

  ModProps(itf, ifs, ofs);

  return 0;
}

See also

Exporting SD data to a csv file

/****************************************************************************
 Copyright (C) 2002-2011 OpenEye Scientific Software, Inc.
*****************************************************************************
* Extract properties from SD file and save as CSV
****************************************************************************/
#include <openeye.h>
#include <oeplatform.h>
#include <oesystem.h>
#include <oechem.h>

using namespace OEPlatform;
using namespace OESystem;
using namespace OEChem;

static void SDF2CSV(oemolistream& ifs, oeofstream& csv)
{
  std::vector<std::string> taglist;
  OEGraphMol mol;
  // read through once to find all unique tags
  while (OEReadMolecule(ifs, mol))
    for (OEIter<OESDDataPair> dp = OEGetSDDataPairs(mol); dp; ++dp)
      if (find(taglist.begin(), taglist.end(), dp->GetTag()) == taglist.end())
        taglist.push_back(dp->GetTag());

  // output the header row
  csv << "Title";
  std::vector<std::string>::iterator tli;
  for (tli = taglist.begin(); tli != taglist.end(); ++tli)
    csv << ',' << *tli;
  csv << oeendl;

  // read through again filling rows for each molecule
  ifs.rewind();
  while (OEReadMolecule(ifs, mol))
  {
    csv << mol.GetTitle();
    for (tli = taglist.begin(); tli != taglist.end(); ++tli)
      csv << ',' << OEGetSDData(mol, (*tli));
    csv << oeendl;
  }
}

int main(int argc, char *argv[])
{
  if (argc != 3)
    OEThrow.Usage("%s <infile> <csvfile>", argv[0]);

  oemolistream ifs;
  if (!ifs.open(argv[1]))
    OEThrow.Fatal("Unable to open %s for reading", argv[1]);
  unsigned int fmt = ifs.GetFormat();
  if (fmt != OEFormat::SDF && fmt != OEFormat::OEB)
    OEThrow.Fatal("Only works for sdf or oeb input files");

  oeofstream csv;
  if (!csv.open(argv[2]))
    OEThrow.Fatal("Unable to open %s for writing", argv[2]);

  SDF2CSV(ifs, csv);

  return 0;
}

See also

Adding csv data as SD tags

/****************************************************************************
 Copyright (C) 2006-2011 OpenEye Scientific Software, Inc.
*****************************************************************************
* Merge a CSV file of data/properties, key on compound name in first column
* and use column titles as keys.  All data is read/written as strings
****************************************************************************/
#include <openeye.h>
#include <oeplatform.h>
#include <oesystem.h>
#include <oechem.h>

using namespace OEPlatform;
using namespace OESystem;
using namespace OEChem;

static void CSV2SDF(oeifstream& csv, oemolistream& ifs, oemolostream& ofs)
{
  std::string line;
  csv.getline(line);

  std::vector<std::string>  propnames;
  bool concat_delimiters = false;
  OEStringTokenize(propnames, line, ",", concat_delimiters);
  propnames.erase(propnames.begin());

  std::map<std::string, std::vector<std::string> >  values;
  while (csv && csv.getline(line))
  {
    std::vector<std::string>  tokens;
    if (OEStringTokenize(tokens, line, ",", concat_delimiters) && tokens.size() > 1)
    {
      std::string title = tokens[0];
      if (title.empty())
      {
        OEThrow.Warning("Skipping entry with no title");
        continue;
      }
      tokens.erase(tokens.begin());
      values[title] = tokens;
    }
  }
  OEGraphMol mol;
  while (OEReadMolecule(ifs, mol))
  {
    std::map< std::string, std::vector<std::string> >::iterator vi = values.find(mol.GetTitle());
    if (vi != values.end())
    {
      const std::vector<std::string>& val = vi->second;
      for (unsigned int p = 0; p < val.size() && p < propnames.size(); ++p)
      {
        std::string sdval = val[p];
        if (sdval.empty())
          continue;
        else
          OESetSDData(mol, propnames[p], val[p]);
      }
    }
    OEWriteMolecule(ofs, mol);
  }
}

int main(int argc, char* argv[])
{
  if (argc != 4)
    OEThrow.Usage("%s <csvfile> <infile> <outsdfile>", argv[0]);

  oeifstream csv;
  if (!csv.open(argv[1]))
    OEThrow.Fatal("Unable to open %s csv for reading", argv[1]);

  oemolistream ifs;
  if (!ifs.open(argv[2]))
    OEThrow.Fatal("Unable to open %s for reading", argv[2]);

  oemolostream ofs;
  if (!ofs.open(argv[3]))
    OEThrow.Fatal("Unable to open %s for writing", argv[3]);
  unsigned int fmt = ofs.GetFormat();
  if (fmt != OEFormat::SDF && fmt != OEFormat::OEB)
    OEThrow.Fatal("Only works for sdf or oeb output files");

  CSV2SDF(csv, ifs, ofs);

  return 0;
}

See also

Renaming molecules by SD field

/****************************************************************************
 Copyright (C) 2003-2011 OpenEye Scientific Software, Inc.
*****************************************************************************
* Rename SDF molecules by specified field
****************************************************************************/
#include <openeye.h>
#include <oesystem.h>
#include <oechem.h>

using namespace OESystem;
using namespace OEChem;

static void Rename(oemolistream& ifs, oemolostream& ofs, const std::string &fieldname)
{
  OEGraphMol mol;
  while (OEReadMolecule(ifs, mol))
  {
    if (OEHasSDData(mol, fieldname))
      mol.SetTitle(OEGetSDData(mol, fieldname));
    else
      OEThrow.Warning("Renaming of molecule %s failed; no field %s", mol.GetTitle(), fieldname.c_str());
    OEWriteMolecule(ofs, mol);
  }
}

int main(int argc, char *argv[])
{
  if (argc != 4)
    OEThrow.Usage("%s <fieldname> <infile> <outfile>", argv[0]);

  std::string fieldname = argv[1];

  oemolistream ifs;
  if (!ifs.open(argv[2]))
    OEThrow.Fatal("Unable to open %s for reading", argv[2]);
  if (!OEIsSDDataFormat(ifs.GetFormat()))
    OEThrow.Fatal("Only works for input file formats that support SD data (sdf,oeb,csv)");

  oemolostream ofs;
  if (!ofs.open(argv[3]))
    OEThrow.Fatal("Unable to open %s for writing", argv[3]);

  Rename(ifs, ofs, fieldname);

  return 0;
}

See also

Filter molecules by SD data

/****************************************************************************
 Copyright (C) 2010-2011 OpenEye Scientific Software, Inc.
*****************************************************************************
* Filter molecules by SD data
****************************************************************************/
#include <openeye.h>
#include <oesystem.h>
#include <oechem.h>
#include "sdfilter.itf"

#include <limits>

using namespace OESystem;
using namespace OEChem;

int main(int argc, char *argv[])
{
  OEInterface itf(InterfaceData, argc, argv);

  if (!(itf.Has<double>("-min") || itf.Has<double>("-max")))
    OEThrow.Fatal("Please set a filter value with -min or -max");

  oemolistream ifs;
  if (!ifs.open(itf.Get<std::string>("-i")))
    OEThrow.Fatal("Unable to open %s for reading", itf.Get<std::string>("-i").c_str());
  if (!OEIsSDDataFormat(ifs.GetFormat()))
    OEThrow.Fatal("Only works for input file formats that support SD data (sdf,oeb,csv)");

  oemolostream ofs;
  if (!ofs.open(itf.Get<std::string>("-o")))
    OEThrow.Fatal("Unable to open %s for writing", itf.Get<std::string>("-o").c_str());
  if (!OEIsSDDataFormat(ofs.GetFormat()))
    OEThrow.Fatal("Only works for output file formats that support SD data (sdf,oeb,csv)");

  std::string tag = itf.Get<std::string>("-tag");

  double minval = std::numeric_limits<double>::min();
  if (itf.Has<double>("-min"))
    minval = itf.Get<double>("-min");

  double maxval = std::numeric_limits<double>::max();
  if (itf.Has<double>("-max"))
    maxval = itf.Get<double>("-max");

  OEGraphMol mol;
  while (OEReadMolecule(ifs, mol))
  {
    if (!OEHasSDData(mol, tag))
    {
      OEThrow.Warning("Unable to find %s tag on %s", tag.c_str(), mol.GetTitle());
      continue;
    }

    std::string value = OEGetSDData(mol, tag);
    double tagvalue;
    if (!OEStringToNumber(value, tagvalue))
    {
      OEThrow.Warning("Failed to convert (%s) to a number in %s", value.c_str(), mol.GetTitle());
      continue;
    }

    if (tagvalue < minval)
      continue;

    if (tagvalue > maxval)
      continue;

    OEWriteMolecule(ofs, mol);
  }

  return 0;
}

See also

Counting molecules

/****************************************************************************
 Copyright (C) 2002-2011 OpenEye Scientific Software, Inc.
*****************************************************************************
* Counts molecule (and conformers) in input files
****************************************************************************/
#include <openeye.h>
#include <oesystem.h>
#include <oechem.h>
#include "molcount.itf"

using namespace OESystem;
using namespace OEChem;

int main(int argc, char *argv[])
{
  OEInterface itf(InterfaceData, argc, argv);

  const bool confs = itf.Get<bool>("-conf");

  unsigned long total = 0;
  unsigned long totconfs = 0;
  unsigned long nfiles = 0;
  for (OEIter<const std::string> i = itf.GetList<std::string>("-i"); i; ++i)
  {
    std::string filename = *i;
    oemolistream ifs;
    if (!ifs.open(filename))
    {
      OEThrow.Warning("Unable to open %s for reading", filename.c_str());
      continue;
    }
    else
    {
      nfiles++;
      unsigned long count = 0;
      unsigned long nconfs = 0;

      OEMol mol;           
      while (OEReadMolecule(ifs, mol))
      {
        count++;
        if (confs)
          nconfs += mol.NumConfs();
      }

      std::cout << filename << " contains " << count << " molecule(s)." << std::endl;
      if (confs)
      {
        std::cout << "Total # of conformers:   " << nconfs << std::endl;
        std::cout << "Average # of conformers: " << (float)nconfs / (float)count << std::endl;
        std::cout << "-----------------------------------------------------------" << std::endl;
      }
      
      total += count;
      totconfs += nconfs;
    }
  }

  std::cout << "===========================================================" << std::endl;
  std::cout << "Total " << total << " molecules" << std::endl;
  if (confs && (nfiles > 0) )
  {
    std::cout << "Total # of conformers:   " << totconfs << std::endl;
    std::cout << "Average # of conformers: " << (float)totconfs / (float)total << std::endl;
  }

  return 0;
}

Get molecule titles

/****************************************************************************
 Copyright (C) 2010, 2011 OpenEye Scientific Software, Inc.
*****************************************************************************
* Output all molecule titles
****************************************************************************/
#include <openeye.h>
#include <oeplatform.h>
#include <oesystem.h>
#include <oechem.h>

using namespace OEPlatform;
using namespace OESystem;
using namespace OEChem;

int main(int argc, char *argv[])
{
  if (argc != 2 && argc != 3)
    OEThrow.Usage("%s <infile> [<outfile>]", argv[0]);

  oemolistream ifs;
  if (!ifs.open(argv[1]))
    OEThrow.Fatal("Unable to open %s for reading", argv[1]);

  oeofstream ofs = oeout;
  if (argc > 2)
    if (!ofs.open(argv[2]))
      OEThrow.Fatal("Unable to open %s for writing", argv[2]);

  OEMol mol;
  while (OEReadMolecule(ifs, mol))
  {
    std::string title = mol.GetTitle();

    if (title.empty())
      title = "untitled";

    ofs << title << oeendl;
  }

  return 0;
}

See also

Find minimum path in a molecule

/****************************************************************************
 Copyright (C) 2010, 2011, 2014 OpenEye Scientific Software, Inc.
*****************************************************************************
* Find the minimum path length between 2 smarts patterns
* or the path length between 2 named atoms
****************************************************************************/
#include <openeye.h>
#include <oesystem.h>
#include <oechem.h>
#include "minpath.itf"

using namespace OESystem;
using namespace OEChem;
using namespace std;

static void AtomPathLength(oemolistream& ifs, oemolostream& ofs, OEInterface& itf,
                           string& atm1, string& atm2)
{
  OEGraphMol mol;
  while (OEReadMolecule(ifs, mol))
  {
    OETriposAtomNames(mol);

    OEAtomBase* a1 = NULL;
    OEAtomBase* a2 = NULL;
    for (OEIter<OEAtomBase> atm = mol.GetAtoms(); atm; ++atm)
    {
      if (atm->GetName() == atm1)
        a1 = atm;
      if (atm->GetName() == atm2)
        a2 = atm;
      if (a1 && a2)
        break;
    }

    if (!(a1 && a2))
    {
      OEThrow.Warning("Failed to find atoms %s and %s in molecule", atm1.c_str(), atm2.c_str());
      continue;
    }

    unsigned int pathlen = OEGetPathLength(a1, a2);
    if (itf.Get<bool>("-verbose") || !itf.Has<string>("-o"))
    {
      string smiles;
      OECreateIsoSmiString(smiles, mol);
      cout << "Path length: " << pathlen << " in " << smiles << endl;
    }

    OEIter<OEAtomBase> spath = OEShortestPath(a1, a2);
    OEGraphMol spathmol;
    bool adjustHCount = true;
    OESubsetMol(spathmol, mol, OEIsAtomMember(spath), adjustHCount);
    string spathsmiles;
    OECreateIsoSmiString(spathsmiles, spathmol);

    if (itf.Has<std::string>("-o"))
      OEWriteMolecule(ofs, spathmol);
    else if (itf.Get<bool>("-verbose"))
      cout << spathsmiles << endl;
  }
}

static void SmartsPathLength(oemolistream& ifs, oemolostream& ofs, OEInterface& itf,
                             OESubSearch& ss1, OESubSearch& ss2)
{
  OEGraphMol mol;
  while (OEReadMolecule(ifs, mol))
  {
    OEPrepareSearch(mol, ss1);
    OEPrepareSearch(mol, ss2);
    if (!(ss1.SingleMatch(mol) && ss2.SingleMatch(mol)))
    {
      OEThrow.Warning("Unable to find SMARTS matches in %s, skipping", mol.GetTitle());
      continue;
    }

    vector<pair<OEAtomBase*, OEAtomBase*> > allatompairs;
    bool unique = true;
    unsigned int allminlen = std::numeric_limits<unsigned int>::max();
    for (OEIter<OEMatchBase> match1 = ss1.Match(mol, unique); match1; ++match1)
    {
      for (OEIter<OEMatchBase> match2 = ss2.Match(mol, unique); match2; ++match2)
      {
        vector<pair<OEAtomBase*, OEAtomBase*> > atompairs;
        unsigned int minlen = std::numeric_limits<unsigned int>::max();
        for(OEIter<OEAtomBase> ai1 = match1->GetTargetAtoms(); ai1; ++ai1)
        {
          OEAtomBase *atom1 = ai1;
          for(OEIter<OEAtomBase> ai2 = match2->GetTargetAtoms(); ai2; ++ai2)
          {
            OEAtomBase *atom2 = ai2;
            unsigned int pathlen = OEGetPathLength(atom1, atom2);
            if (minlen > pathlen)
            {
              minlen = pathlen;
              atompairs.clear();
              atompairs.push_back(make_pair(atom1, atom2));
            }
            else if (minlen == pathlen)
              atompairs.push_back(make_pair(atom1, atom2));
          }
        }
        if (minlen < allminlen)
        {
          allminlen = minlen;
          allatompairs = atompairs;
        }
        else if (minlen == allminlen)
          allatompairs.insert(allatompairs.end(), atompairs.begin(), atompairs.end());
      }
    }

    if (itf.Get<bool>("-verbose") || !itf.Has<string>("-o"))
    {
      string smiles;
      OECreateIsoSmiString(smiles, mol);
      cout << "Shortest path length: " << allminlen << " in " << smiles << endl;
    }

    vector<pair<OEAtomBase*, OEAtomBase*> >::iterator v_i;
    set<string> spathlist;
    pair<set<string>::iterator, bool> ret;
    for (v_i = allatompairs.begin(); v_i != allatompairs.end(); ++v_i)
    {
      OEIter<OEAtomBase> spath = OEShortestPath(v_i->first, v_i->second);
      OEGraphMol spathmol;
      OESubsetMol(spathmol, mol, OEIsAtomMember(spath));
      string spathsmiles;
      OECreateIsoSmiString(spathsmiles, spathmol);
      ret = spathlist.insert(spathsmiles);
      if (!ret.second)
        continue;

      if (itf.Has<std::string>("-o"))
        OEWriteMolecule(ofs, spathmol);
      else if (itf.Get<bool>("-verbose"))
        cout << spathsmiles << endl;
    }
  }
}

int main(int argc, char *argv[])
{
  OEInterface itf(InterfaceData, argc, argv);

  if (!((itf.Has<std::string>("-smarts1") && itf.Has<std::string>("-smarts2"))
        ^ (itf.Has<std::string>("-atom1") && itf.Has<std::string>("-atom2"))))
    OEThrow.Fatal("-smarts1 and -smarts2 or -atom1 and -atom2 must be set");

  oemolistream ifs;
  if (!ifs.open(itf.Get<std::string>("-i")))
    OEThrow.Fatal("Unable to open %s for reading", itf.Get<std::string>("-i").c_str());

  oemolostream ofs;
  if (itf.Has<std::string>("-o"))
    if (!ofs.open(itf.Get<std::string>("-o")))
      OEThrow.Fatal("Unable to open %s for writing", itf.Get<std::string>("-o").c_str());

  if (itf.Has<std::string>("-smarts1") && itf.Has<std::string>("-smarts2"))
  {
    OESubSearch ss1;
    OESubSearch ss2;
    std::string smarts1 = itf.Get<std::string>("-smarts1");
    if (!ss1.Init(smarts1.c_str()))
      OEThrow.Fatal("Unable to parse SMARTS1: %s", smarts1.c_str());

    std::string smarts2 = itf.Get<std::string>("-smarts2");
    if (!ss2.Init(smarts2.c_str()))
      OEThrow.Fatal("Unable to parse SMARTS2: %s", smarts2.c_str());

    SmartsPathLength(ifs, ofs, itf, ss1, ss2);
  }
  else
  {
    std::string atom1 = itf.Get<std::string>("-atom1");
    std::string atom2 = itf.Get<std::string>("-atom2");
    AtomPathLength(ifs, ofs, itf, atom1, atom2);
  }

  return 0;
}

See also

Extract ring templates

/****************************************************************************
 Copyright (C) 2014 OpenEye Scientific Software, Inc.
*****************************************************************************
* Extract ring templates for 2D coordinate generation
****************************************************************************/
#include <openeye.h>
#include <oeplatform.h>
#include <oesystem.h>
#include <oechem.h>

#include "extractringtemplates.itf"

#include <string>

using namespace OEPlatform;
using namespace OESystem;
using namespace OEChem;

int main(int argc, char* argv[])
{
  OEInterface itf(InterfaceData, argc, argv);

  const std::string ifname = itf.Get<std::string>("-in");
  const std::string ofname = itf.Get<std::string>("-out");

  oemolistream ifs;
  if (!ifs.open(ifname))
    OEThrow.Fatal("Unable to open %s for reading", ifname.c_str());

  if (!OEIs2DFormat(ifs.GetFormat()))
    OEThrow.Fatal("Invalid input format: need 2D coordinates");

  oemolostream ofs;
  if (!ofs.open(ofname))
    OEThrow.Fatal("Unable to open %s for writing", ofname.c_str());

  if (!OEIs2DFormat(ofs.GetFormat()))
    OEThrow.Fatal("Invalid output format: unable to write 2D coordinates");

  unsigned int nrrings = 0;
  OEGraphMol mol;
  while (OEReadMolecule(ifs, mol))
  {
    for (OEIter<OEMolBase> ri = OEExtractRingTemplates(mol); ri; ++ri, ++nrrings)
      OEWriteMolecule(ofs, *ri);
  }
  OEThrow.Info("%d number of ring templates extracted", nrrings);

  return 0;
}

Create 2D ring dictionary

/****************************************************************************
 Copyright (C) 2015 OpenEye Scientific Software, Inc.
*****************************************************************************
* Creates a new 2D ring dictionary
****************************************************************************/

#include <openeye.h>

#include <string>

#include <oeplatform.h>
#include <oesystem.h>
#include <oechem.h>

#include "createringdict.itf"

using namespace OEPlatform;
using namespace OESystem;
using namespace OEChem;

int main(int argc, char* argv[])
{
  OEInterface itf(InterfaceData);

  if(!OEParseCommandLine(itf, argc, argv))
    OEThrow.Fatal("Unable to interpret command line!");

  const std::string ifname = itf.Get<std::string>("-in");
  const std::string ofname = itf.Get<std::string>("-ringdict");

  oemolistream ifs;
  if (!ifs.open(ifname))
    OEThrow.Fatal("Unable to open %s for reading!", ifname.c_str());

  if (!OEIs2DFormat(ifs.GetFormat()))
    OEThrow.Fatal("Invalid input file format for 2D coordinates!");

  oemolostream ofs;
  if (!ofs.open(ofname))
    OEThrow.Fatal("Unable to open %s for writing!", ofname.c_str());

  if (ofs.GetFormat() != OEFormat::OEB)
    OEThrow.Fatal("Output file has to have OEB format!");

  OE2DRingDictionary ringdict;

  OEDots dots(10000, 100, "molecules");

  OEGraphMol mol;
  while (OEReadMolecule(ifs, mol))
  {
    dots.Update();
    ringdict.AddRings(mol);
  }
  dots.Total();

  const unsigned int nrrings = ringdict.NumRings();
  OEThrow.Info("%d number of ring templates has been extracted!", nrrings);

  OEWrite2DRingDictionary(ofname, ringdict);

  return 0;
}

Append to 2D ring dictionary

/****************************************************************************
 Copyright (C) 2015, 2016 OpenEye Scientific Software, Inc.
*****************************************************************************
* Appends rings to an existing 2D rings dictionary
****************************************************************************/

#include <openeye.h>
#include <oeplatform.h>
#include <oesystem.h>
#include <oechem.h>

#include "appendringdict.itf"

#include <string>

using namespace OEPlatform;
using namespace OESystem;
using namespace OEChem;

int main(int argc, char* argv[])
{
  OEInterface itf(InterfaceData);

  if(!OEParseCommandLine(itf, argc, argv))
    OEThrow.Fatal("Unable to interpret command line!");

  const std::string ifname = itf.Get<std::string>("-in");
  const std::string irdfname = itf.Get<std::string>("-inringdict");
  const std::string ordfname = itf.Get<std::string>("-outringdict");

  oemolistream ifs;
  if (!ifs.open(ifname))
    OEThrow.Fatal("Unable to open %s for reading!", ifname.c_str());

  if (!OEIs2DFormat(ifs.GetFormat()))
    OEThrow.Fatal("Invalid input file format for 2D coordinates!");

  if (!OEIsValid2DRingDictionary(irdfname))
    OEThrow.Fatal("Invalid ring dictionary file!");

  OE2DRingDictionary ringdict(irdfname);

  const unsigned int nrrings = ringdict.NumRings();

  OEDots dots(10000, 100, "molecules");

  OEGraphMol mol;
  while (OEReadMolecule(ifs, mol))
  {
    dots.Update();
    ringdict.AddRings(mol);
  }

  dots.Total();

  const unsigned int nrnewrings  = ringdict.NumRings() - nrrings;
  OEThrow.Info("%d new ring templates have been added!", nrnewrings);

  OEWrite2DRingDictionary(ordfname, ringdict);

  return 0;
}

Generate 2D coordinates with user-defined ring templates

/****************************************************************************
* Copyright (C) 2015 OpenEye Scientific Software, Inc.
*****************************************************************************
* Generates 2D coordinates using user-defined ring templates
****************************************************************************/

#include <openeye.h>

#include <string.h>

#include <oeplatform.h>
#include <oesystem.h>
#include <oechem.h>

#include "generate2D.itf"

using namespace std;
using namespace OEPlatform;
using namespace OESystem;
using namespace OEChem;

int main(int argc, char *argv[])
{
  OEInterface itf(InterfaceData);

  if(!OEParseCommandLine(itf, argc, argv))
    OEThrow.Fatal("Unable to interpret command line!");

  const string ifname = itf.Get<string>("-in");
  const string ofname = itf.Get<string>("-out");

  oemolistream ifs;
  if (!ifs.open(ifname))
    OEThrow.Fatal("Unable to open %s for reading!", ifname.c_str());

  oemolostream ofs;
  if (!ofs.open(ofname))
    OEThrow.Fatal("Unable to open %s for writing!", ofname.c_str());

  if (!OEIs2DFormat(ofs.GetFormat()))
    OEThrow.Fatal("Invalid output file format for 2D coordinates!");

  if (itf.Has<string>("-ringdict"))
  {
    const string rfname = itf.Get<string>("-ringdict");
    OEInit2DRingDictionary(rfname);
  }

  OEGraphMol mol;
  while (OEReadMolecule(ifs, mol))
  {
     OEGenerate2DCoordinates(mol);
     OEWriteMolecule(ofs, mol);
  }

  return 0;
}