Converting Molecules to Names

Converts a file of chemical structures (specified by -in option) into chemical names (-out option), in a choice of language (-language option), encodings (-encoding option) and styles (-style option).

Listing 1: Converting molecules to names

/***********************************************************************
Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009-2011
OpenEye Scientific Software, Inc.
***********************************************************************/
#include "openeye.h"

#include <stdlib.h>
#include <string>

#include "oeplatform.h"  
#include "oesystem.h"
#include "oechem.h"
#include "oeiupac.h"

#include "mol2nam_example.itf"

using namespace OEPlatform;
using namespace OESystem;
using namespace OEChem;
using namespace OEIUPAC;
using namespace std;

int main(int argc,char *argv[])
{
  OEInterface itf(InterfaceData,argc,argv);
  
  if (!itf.Get<bool>("-nobanner"))
  {
    OEThrow.Info("Lexichem mol2nam");
    OEThrow.Info("OpenEye Scientific Software");
    OEThrow.Info("   Version: %s", OEIUPACGetRelease());
    OEThrow.Info("     Built: %d", OEIUPACGetVersion());
    OEThrow.Info("  Platform: %s\n", OEIUPACGetPlatform());
  }

  unsigned int language=OEGetIUPACLanguage(itf.Get<string>("-language"));
  unsigned int charset=OEGetIUPACCharSet(itf.Get<string>("-encoding"));
  const unsigned char *style=OEGetIUPACNamStyle(itf.Get<string>("-style"));
  bool useDots = itf.Get<bool>("-dots");

  oemolistream ifs(itf.Get<string>("-in"));
  if (!ifs)
    OEThrow.Fatal("Unable to open %s for reading",
                  itf.Get<string>("-in").c_str());

  oemolostream ofs;
  string outname="";
  if (itf.Has<string>("-out"))
  {
    outname=itf.Get<string>("-out");
    if (!ofs.open(outname))
      OEThrow.Fatal("Unable to open %s for writing", outname.c_str());
  }

  OEGraphMol mol;
  std::string tmp;
  OEDots dots(10000, 200, "molecules");
  while (OEReadMolecule(ifs, mol))
  {
    if (useDots)
      dots.Update();

    std::string name = OEIUPAC::OECreateIUPACName(mol,style);

    if (language)
      name = OEIUPAC::OEToLanguage(name.c_str(),language);
    if (itf.Get<bool>("-capitalize"))
      name = OEIUPAC::OECapitalizeName(name.c_str());

    switch (charset)
    {
    case OECharSet::ASCII:
      name = OEIUPAC::OEToASCII(name.c_str());
      break;
    case OECharSet::UTF8:
      name = OEIUPAC::OEToUTF8(name.c_str());
      break;
    case OECharSet::HTML:
      name = OEIUPAC::OEToHTML(name.c_str());
      break;
    case OECharSet::Latin1:
      name = OEIUPAC::OEToLatin1(name.c_str());
      break;
    case OECharSet::SJIS:
      name = OEIUPAC::OEToSJIS(name.c_str());
      break;
    case OECharSet::EUCJP:
      name = OEIUPAC::OEToEUCJP(name.c_str());
      break;
    }
   
    if (!itf.Get<bool>("-suppressName"))
    { mol.SetTitle(name); }
    else
    { OEThrow.Warning("No title generated, please use -tag option"); }

    if (outname.size() > 0)
    {
      if (itf.Has<string>("-delim"))
      {
        const char *title = mol.GetTitle();
        if(title && *title)
        {
          tmp = name;
          name = title;
          name.append(itf.Get<string>("-delim"));
          name.append(tmp);
        }
      }

      if (itf.Has<string>("-tag"))
        OESetSDData(mol,itf.Get<string>("-tag"),name);

      OEWriteMolecule(ofs, mol);
    }
    else printf("%s\n",name.c_str());
  }

  if (useDots)
    dots.Total();

  return 0;
}

Converting Names to Molecules

Converts a file of chemical names (specified by the -in option) of a specific language (-language option) into a file of chemical structures (specified by the -out option).

Listing 2: Converting names to molecules

/***********************************************************************
Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009-2011
OpenEye Scientific Software, Inc.
***********************************************************************/
#include "openeye.h"

#include "oeplatform.h"
#include "oesystem.h"
#include "oechem.h"
#include "oeiupac.h"

#include "nam2mol_example.itf"

using namespace OEPlatform;
using namespace OESystem;
using namespace OEChem;
using namespace OEIUPAC;
using namespace std;

#ifndef STDIN_FILENO
#define STDIN_FILENO 0
#endif

int main(int argc, char *argv[])
{
  OEInterface itf(InterfaceData, argc, argv);

  if (!itf.Get<bool>("-nobanner"))
  {
    OEThrow.Info("Lexichem nam2mol");
    OEThrow.Info("OpenEye Scientific Software");
    OEThrow.Info("   Version: %s", OEIUPACGetRelease());
    OEThrow.Info("     Built: %d", OEIUPACGetVersion());
    OEThrow.Info("  Platform: %s\n", OEIUPACGetPlatform());
  }

  oeifstream infile;
  string inname=itf.Get<string>("-in");
  if (inname=="-")
  {
    if (!infile.openfd(STDIN_FILENO, true)) // read from stdin
      OEThrow.Fatal("Unable to read from stdin");  
  }
  else
  {   
    if (!infile.open(inname))
      OEThrow.Fatal("Unable to open input file: %s\n", inname.c_str());
  }

  oemolostream outfile;
  if (!outfile.open(itf.Get<string>("-out")))
    OEThrow.Fatal("Unable to create output file: %s\n", 
                  itf.Get<string>("-out").c_str());

  unsigned int language = OEGetIUPACLanguage(itf.Get<string>("-language"));
  unsigned int charset = OEGetIUPACCharSet(itf.Get<string>("-encoding"));
  bool useDots = itf.Get<bool>("-dots");
   
  OEGraphMol mol;
  char buffer[8192];
  bool done;
  OEDots dots(10000, 200, "names");

  while (infile.getline(buffer,8192))
  {
    if (useDots)
      dots.Update();

    mol.Clear();

    // Speculatively reorder CAS permuted index names
    std::string str = OEReorderIndexName(buffer);
    if (str.empty()) str = buffer;

    if (charset == OECharSet::UTF8)
      str = OEFromUTF8(str.c_str());
    else if (charset == OECharSet::HTML)
      str = OEFromHTML(str.c_str());
    else if (charset == OECharSet::Latin1)
      str = OEFromLatin1(str.c_str());

    str = OELowerCaseName(str.c_str());

    if (language != OELanguage::AMERICAN)
      str = OEFromLanguage(str.c_str(),language);

    done = OEParseIUPACName(mol,str.c_str());

    if (!done && itf.Get<bool>("-empty"))
    {
      mol.Clear();
      done = true;
    }
    if (done)
    {
      if (itf.Has<string>("-tag"))
        OESetSDData(mol,itf.Get<string>("-tag"),buffer);
      mol.SetTitle(buffer);
      OEWriteMolecule(outfile,mol);
    }
  }

  if (useDots)
    dots.Total();

  return 0;
}

Translating Names Between Languages

Translates a file of chemical names (specified by the -in option) in a specific language (-from option) into a file of names (specified by the -out option) in another language (-to option).

Listing 3: Translate names between languages

/***********************************************************************
 Copyright (C) 2005-2014
 OpenEye Scientific Software, Inc.

 Translates between languages.  Internally LexichemTK uses American
 English so it will convert to/from that as an intermediate
 representation.

 By default the program inputs/outputs the internal LexichemTK
 character set representation.  Optionally one can convert the
 input or output to alternate encodings, eg: HTML or UTF8.

 ***********************************************************************/
#include "openeye.h"

#include "string"

#include "oeplatform.h"
#include "oesystem.h"
#include "oeiupac.h"

#include "translate_example.itf"

using namespace OEPlatform;
using namespace OESystem;
using namespace OEIUPAC;

#ifndef STDIN_FILENO
#define STDIN_FILENO 0
#endif

#ifndef STDOUT_FILENO
#define STDOUT_FILENO 1
#endif

#define MAXBUF 8192

static void Translate(oeifstream &infile, oeofstream &outfile,
                      unsigned int from_language, unsigned int to_language,
                      unsigned int from_charset, unsigned int to_charset,
                      bool debug)
{
  char buffer[MAXBUF];
  std::string str;

  while (infile.getline(buffer, MAXBUF))
  {
    if (buffer[0] == '#')
    {           
      outfile << buffer << oeendl;
      continue;
    }

    if (debug)
      outfile << "# " << str << oeendl;
      
    str = buffer;

    // Convert from Charset to internal representation
    switch (from_charset)
    {
      case OECharSet::UTF8:
        str = OEIUPAC::OEFromUTF8(str.c_str());
        break;
      case OECharSet::HTML:
        str = OEIUPAC::OEFromHTML(str.c_str());
        break;
      case OECharSet::Latin1:
        str = OEIUPAC::OEFromLatin1(str.c_str());
        break;

      // These are NO-OPS
      case OECharSet::ASCII:
      case OECharSet::DEFAULT:
      default:
        break;
    }

    // Translation functions all operate on lowercase names
    str = OEIUPAC::OELowerCaseName(str.c_str());

    if (from_language != OELanguage::AMERICAN)
      str = OEFromLanguage(str.c_str(),from_language);

    // At this point the name is American English in the
    // LexichemTK default internal character set representation.

    // Convert to output language
    if (to_language != OELanguage::AMERICAN)
      str = OEToLanguage(str.c_str(),to_language);

    // Convert to output charset
    switch (to_charset)
    {
      case OECharSet::ASCII:
        str = OEIUPAC::OEToASCII(str.c_str());
        break;

      case OECharSet::UTF8:
        str = OEIUPAC::OEToUTF8(str.c_str());
        break;

      case OECharSet::HTML:
        str = OEIUPAC::OEToHTML(str.c_str()) + "<br>";
        break;

      case OECharSet::Latin1:
        str = OEIUPAC::OEToLatin1(str.c_str());
        break;

      case OECharSet::SJIS:
        str = OEIUPAC::OEToSJIS(str.c_str());
        break;

      case OECharSet::EUCJP:
        str = OEIUPAC::OEToEUCJP(str.c_str());
        break;

      default:
        break;
    }
    outfile << str << oeendl;
  }
}

int main(int argc,char *argv[])
{
  OEInterface itf(InterfaceData, argc, argv);

  unsigned int from_language = 
    OEGetIUPACLanguage(itf.Get<std::string>("-from"));
  unsigned int to_language =
    OEGetIUPACLanguage(itf.Get<std::string>("-to"));
  unsigned int from_charset =
    OEGetIUPACCharSet(itf.Get<std::string>("-from_charset"));
  unsigned int to_charset =
    OEGetIUPACCharSet(itf.Get<std::string>("-to_charset"));

  bool debug = itf.Get<bool>("-debug");

  if (!itf.Get<bool>("-nobanner"))
  {
    OEThrow.Info("Lexichem translate");
    OEThrow.Info("OpenEye Scientific Software");
    OEThrow.Info("   Version: %s", OEIUPACGetRelease());
    OEThrow.Info("     Built: %d", OEIUPACGetVersion());
    OEThrow.Info("  Platform: %s\n", OEIUPACGetPlatform());
  }

  oeifstream infile;
  std::string inname=itf.Get<std::string>("-in");
  if (inname=="-")
  {
    if (!infile.openfd(STDIN_FILENO, true)) // read from stdin
      OEThrow.Fatal("Unable to read from stdin");  
  }
  else
  {   
    if (!infile.open(inname))
      OEThrow.Fatal("Unable to open input file: %s\n", inname.c_str());
  }

  oeofstream outfile;
  std::string outname=itf.Get<std::string>("-out");
  if (outname=="-")
  {
    if (!outfile.openfd(STDOUT_FILENO, true)) // write to stdout
      OEThrow.Fatal("Unable to write to stdout");  
  }
  else
  {   
    if (!outfile.open(outname))
      OEThrow.Fatal("Unable to open output file: %s\n", outname.c_str());
  }

  Translate(infile, outfile, from_language, to_language,
            from_charset, to_charset, debug);

  fflush(stdout);
  infile.close();
  outfile.close();

  return 0;
}