Converting Molecules to Names

Converts a file of chemical structures (specified by -in option) into chemical names (-out option), in a choice of language (-language option), encodings (-encoding option) and styles (-style option).

Listing 1: Converting molecules to names

/*******************************************************************************
 * Copyright 2010-2016 OpenEye Scientific Software, Inc.
 ******************************************************************************/
using System;
using OpenEye.OEChem;
using OpenEye.OEIUPAC;

public class Mol2Nam_example 
{
  private void Mol2Nam(OEInterface itf) 
  {
    oemolistream ifs = new oemolistream();
    if (!ifs.open(itf.GetString("-in")))
      OEChem.OEThrow.Fatal("Unable to open input file: "+itf.GetString("-in"));

    oemolostream ofs = new oemolostream();
    string outname = null;
    if (itf.HasString("-out"))
    {
      outname = itf.GetString("-out");
      if (!ofs.open(outname))
        OEChem.OEThrow.Fatal("Unable to open output file: "+outname);
    }

    uint language = OEIUPAC.OEGetIUPACLanguage(itf.GetString("-language"));
    uint charset  = OEIUPAC.OEGetIUPACCharSet(itf.GetString("-encoding"));
    uint style = OEIUPAC.OEGetIUPACNamStyle(itf.GetString("-style"));

    OEGraphMol mol = new OEGraphMol();
    while (OEChem.OEReadMolecule(ifs, mol))
    {
      string name = OEIUPAC.OECreateIUPACName(mol, style);

      if (language > 0)
        name = OEIUPAC.OEToLanguage(name, language);

      if (itf.GetBool("-capitalize"))
        name = OEIUPAC.OECapitalizeName(name);

      if (charset == OECharSet.ASCII)
        name = OEIUPAC.OEToASCII(name);
      else if (charset == OECharSet.UTF8)
        name = OEIUPAC.OEToUTF8(name);
      else if (charset == OECharSet.HTML)
        name = OEIUPAC.OEToHTML(name);
      else if (charset == OECharSet.SJIS)
        name = OEIUPAC.OEToSJIS(name);
      else if (charset == OECharSet.EUCJP)
        name = OEIUPAC.OEToEUCJP(name);

      if (outname != null)
      {
        if (itf.HasString("-delim"))
          name = mol.GetTitle() + itf.GetString("-delim") + name;

        if (itf.HasString("-tag"))
          OEChem.OESetSDData(mol, itf.GetString("-tag"), name);

        mol.SetTitle(name);
        OEChem.OEWriteMolecule(ofs, mol);
      }
      else
        Console.WriteLine(name);
    }
  }

  public static void Main(string[] argv) 
  {
    Mol2Nam_example app = new Mol2Nam_example();
    OEInterface itf = new OEInterface(interfaceData, "Mol2Nam_example", argv);
    app.Mol2Nam(itf);
  }

  private static string interfaceData = @"
# mol2nam interface file
!CATEGORY mol2nam

  !CATEGORY I/O
    !PARAMETER -in 1
      !ALIAS -i
      !TYPE string
      !REQUIRED true
      !BRIEF Input filename
      !KEYLESS 1
    !END
    
    !PARAMETER -out 2
      !ALIAS -o
      !TYPE string
      !BRIEF Output filename
      !KEYLESS 2
    !END
  !END
  
  !CATEGORY Lexichem Features
        
    !PARAMETER -language 1
       !ALIAS -lang
       !TYPE string
       !DEFAULT american
       !LEGAL_VALUE american
       !LEGAL_VALUE english
       !LEGAL_VALUE us
    
       !LEGAL_VALUE british
       !LEGAL_VALUE uk

       !LEGAL_VALUE chinese
       !LEGAL_VALUE zh
       !LEGAL_VALUE cn

       !LEGAL_VALUE danish
       !LEGAL_VALUE dk
       !LEGAL_VALUE da

       !LEGAL_VALUE dutch
       !LEGAL_VALUE nl

       !LEGAL_VALUE french
       !LEGAL_VALUE fr

       !LEGAL_VALUE german
       !LEGAL_VALUE de
    
       !LEGAL_VALUE greek
       !LEGAL_VALUE el
    
       !LEGAL_VALUE hungarian
       !LEGAL_VALUE hu

       !LEGAL_VALUE irish
       !LEGAL_VALUE ie
       !LEGAL_VALUE ga

       !LEGAL_VALUE italian
       !LEGAL_VALUE it

       !LEGAL_VALUE japanese
       !LEGAL_VALUE jp
       !LEGAL_VALUE ja

       !LEGAL_VALUE polish
       !LEGAL_VALUE pl

       !LEGAL_VALUE portuguese
       !LEGAL_VALUE pt

       !LEGAL_VALUE romanian
       !LEGAL_VALUE ro

       !LEGAL_VALUE russian
       !LEGAL_VALUE ru

       !LEGAL_VALUE slovak
       !LEGAL_VALUE sk

       !LEGAL_VALUE spanish
       !LEGAL_VALUE es

       !LEGAL_VALUE swedish
       !LEGAL_VALUE se
       !LEGAL_VALUE sv

       !LEGAL_VALUE welsh
       !LEGAL_VALUE cy
       
       !REQUIRED false
       !BRIEF Language for output names.
    !END   
    
    !PARAMETER -style 2
      !ALIAS -namestyle 
      !TYPE string
      !DEFAULT openeye
      !LEGAL_VALUE openeye
      !LEGAL_VALUE iupac
      !LEGAL_VALUE cas
      !LEGAL_VALUE traditional
      !LEGAL_VALUE systematic
      !LEGAL_VALUE casindex 
      !LEGAL_VALUE casidx
      !LEGAL_VALUE autonom
      !LEGAL_VALUE iupac79
      !LEGAL_VALUE iupac93
      !LEGAL_VALUE acdname
      !BRIEF Style of output names
    !END      
  
    !PARAMETER -capitalize 3
       !ALIAS -capitalise
       !TYPE bool
       !DEFAULT false
       !BRIEF Capitalize output names.
    !END

    !PARAMETER -tag 4
       !TYPE string
       !REQUIRED false
       !BRIEF Set name as SD data with tag
    !END

    !PARAMETER -delim 5
       !TYPE string
       !REQUIRED false
       !BRIEF Append name to title using 'delim'
    !END

    !PARAMETER -charset 7
      !ALIAS -encoding
      !TYPE string
      !DEFAULT default
      !REQUIRED false
      !LEGAL_VALUE default
      !LEGAL_VALUE ascii
      !LEGAL_VALUE utf8
      !LEGAL_VALUE html
      !LEGAL_VALUE sjis
      !LEGAL_VALUE eucjp
      !BRIEF Choose charset/encoding for output names.
    !END

  !END

!END
";

}

Converting Names to Molecules

Converts a file of chemical names (specified by the -in option) of a specific language (-language option) into a file of chemical structures (specified by the -out option).

Listing 2: Converting names to molecules

/*******************************************************************************
 * Copyright 2010-2016 OpenEye Scientific Software, Inc.
 ******************************************************************************/
using System;
using OpenEye.OEChem;
using OpenEye.OEIUPAC;

public class Nam2Mol_example 
{
  private void Nam2Mol(OEInterface itf) 
  {
    System.IO.StreamReader ifs =
      new System.IO.StreamReader(itf.GetString("-in"));

    oemolostream ofs = new oemolostream();
    if (!ofs.open(itf.GetString("-out")))
      OEChem.OEThrow.Fatal("Unable to open output file: " +
                           itf.GetString("-out"));

    uint language = OEIUPAC.OEGetIUPACLanguage(itf.GetString("-language"));
    uint charset = OEIUPAC.OEGetIUPACCharSet(itf.GetString("-charset"));

    OEGraphMol mol = new OEGraphMol();
    string name = "";
    while ((name = ifs.ReadLine()) != null) 
    {
      mol.Clear();

      // Speculatively reorder CAS permuted index names
      string str = OEIUPAC.OEReorderIndexName(name);
      if (str.Length == 0)
        str = name;

      if (charset == OECharSet.HTML)
        str = OEIUPAC.OEFromHTML(str);
      else if (charset == OECharSet.UTF8)
        str = OEIUPAC.OEFromUTF8(str);

      str = OEIUPAC.OELowerCaseName(str);

      if (language != OELanguage.AMERICAN)
        str = OEIUPAC.OEFromLanguage(str,language);

      bool done = OEIUPAC.OEParseIUPACName(mol,str);

      if (!done && itf.GetBool("-empty")) 
      {
        mol.Clear();
        done = true;
      }

      if (done) 
      {
        if (itf.HasString("-tag"))
          OEChem.OESetSDData(mol, itf.GetString("-tag"),name);
        mol.SetTitle(name);
        OEChem.OEWriteMolecule(ofs,mol);
      }
    }
  }
  
  public static void Main(string[] argv) 
  {
    Nam2Mol_example app = new Nam2Mol_example();
    OEInterface itf = new OEInterface(interfaceData, "Nam2Mol_example", argv);
    app.Nam2Mol(itf);
  }

  private static string interfaceData = @"
# nam2mol interface file
!CATEGORY nam2mol

  !CATEGORY I/O
    !PARAMETER -in 1
      !ALIAS -i
      !TYPE string
      !REQUIRED true
      !BRIEF Input filename
      !KEYLESS 1
    !END
    
    !PARAMETER -out 2
      !ALIAS -o
      !TYPE string
      !REQUIRED true
      !BRIEF Output filename
      !KEYLESS 2
    !END
  !END
  
  !CATEGORY Lexichem Features
        
    !PARAMETER -language 1
       !ALIAS -lang
       !TYPE string
       !DEFAULT american
       !LEGAL_VALUE american
       !LEGAL_VALUE english
       !LEGAL_VALUE us

       !LEGAL_VALUE chinese
       !LEGAL_VALUE zh
       !LEGAL_VALUE cn

       !LEGAL_VALUE danish
       !LEGAL_VALUE dk
       !LEGAL_VALUE da

       !LEGAL_VALUE dutch
       !LEGAL_VALUE nl

       !LEGAL_VALUE french
       !LEGAL_VALUE fr

       !LEGAL_VALUE german
       !LEGAL_VALUE de
    
       !LEGAL_VALUE greek
       !LEGAL_VALUE el
    
       !LEGAL_VALUE hungarian
       !LEGAL_VALUE hu

       !LEGAL_VALUE irish
       !LEGAL_VALUE ie
       !LEGAL_VALUE ga

       !LEGAL_VALUE italian
       !LEGAL_VALUE it

       !LEGAL_VALUE japanese
       !LEGAL_VALUE jp
       !LEGAL_VALUE ja

       !LEGAL_VALUE polish
       !LEGAL_VALUE pl

       !LEGAL_VALUE portuguese
       !LEGAL_VALUE pt

       !LEGAL_VALUE romanian
       !LEGAL_VALUE ro

       !LEGAL_VALUE russian
       !LEGAL_VALUE ru

       !LEGAL_VALUE slovak
       !LEGAL_VALUE sk

       !LEGAL_VALUE spanish
       !LEGAL_VALUE es

       !LEGAL_VALUE swedish
       !LEGAL_VALUE se
       !LEGAL_VALUE sv

       !LEGAL_VALUE welsh
       !LEGAL_VALUE cy
       
       !REQUIRED false
       !BRIEF Language for input names.
     !END   

    !PARAMETER -tag 3
       !TYPE string
       !REQUIRED false
       !BRIEF Set name as SD data with tag
    !END

    !PARAMETER -empty 4
       !TYPE bool
       !DEFAULT false
       !BRIEF Output an empty molecule for unparseable names
    !END

    !PARAMETER -charset 5
       !ALIAS -encoding
       !TYPE string
       !DEFAULT default
       !REQUIRED false
       !LEGAL_VALUE default
       !LEGAL_VALUE ascii
       !LEGAL_VALUE utf8
       !LEGAL_VALUE html
       !BRIEF Choose charset/encoding for input names.
    !END

  !END
!END
";
}

Translating Names Between Languages

Translates a file of chemical names (specified by the -in option) in a specific language (-from option) into a file of names (specified by the -out option) in another language (-to option).

Listing 3: Translate names between languages

/*****************************************************************************
 Copyright 2014-2015 OpenEye Scientific Software, Inc.

 Translates between languages.  Internally LexichemTK uses American
 English so it will convert to/from that as an intermediate
 representation.

 By default the program inputs/outputs the internal LexichemTK
 character set representation.  Optionally one can convert the
 input or output to alternate encodings, eg: HTML or UTF8.

 *****************************************************************************/
using System;
using OpenEye.OEChem;
using OpenEye.OEIUPAC;

public class Translate_example
{
  private void Translate(OEInterface itf)
  {
    System.IO.StreamReader ifs =
      new System.IO.StreamReader(itf.GetString("-in"));

    System.IO.StreamWriter sw = null;
    if (itf.HasString("-o"))
    {
      try
      {
        sw = new System.IO.StreamWriter(itf.GetString("-o"));
        Console.SetOut(sw);
      }
      catch
      {
        OEChem.OEThrow.Fatal("Unable to open " + itf.GetString("-o") +
                             " for writing");
      }
    }

    uint to_language =
      OEIUPAC.OEGetIUPACLanguage(itf.GetString("-to_language"));
    uint from_language =
      OEIUPAC.OEGetIUPACLanguage(itf.GetString("-from_language"));

    uint from_charset =
      OEIUPAC.OEGetIUPACCharSet(itf.GetString("-from_charset"));
    uint to_charset =
      OEIUPAC.OEGetIUPACCharSet(itf.GetString("-to_charset"));

    string name = "";
    while ((name = ifs.ReadLine()) != null)
    {
      // Convert from charset to internal representation
      if (from_charset == OECharSet.UTF8)
        name = OEIUPAC.OEFromUTF8(name);
      else if (from_charset == OECharSet.HTML)
        name = OEIUPAC.OEFromHTML(name);

      // Translation functions operate on lowercase names
      name = OEIUPAC.OELowerCaseName(name);

      if (from_language != OELanguage.AMERICAN)
        name = OEIUPAC.OEFromLanguage(name, from_language);

      // At this point the name is American English in the
      // LexichemTK default internal character set representation.

      // Convert to output language
      if (to_language != OELanguage.AMERICAN)
        name = OEIUPAC.OEToLanguage(name, to_language);

      // Convert to output charset
      if (to_charset == OECharSet.ASCII)
        name = OEIUPAC.OEToASCII(name);
      else if (to_charset == OECharSet.UTF8)
        name = OEIUPAC.OEToUTF8(name);
      else if (to_charset == OECharSet.HTML)
        name = OEIUPAC.OEToHTML(name);
      else if (to_charset == OECharSet.SJIS)
        name = OEIUPAC.OEToSJIS(name);
      else if (to_charset == OECharSet.EUCJP)
        name = OEIUPAC.OEToEUCJP(name);

      Console.WriteLine(name);
    }

    if (itf.HasString("-o"))
      sw.Close();
  }

  public static void Main(string[] argv)
  {
    Translate_example app = new Translate_example();
    OEInterface itf = new OEInterface(interfaceData,
                                      "Translate_example", argv);
    app.Translate(itf);
  }

  private static string interfaceData = @"
# translate interface file
!CATEGORY translate

      !PARAMETER -in 1
        !ALIAS -i
        !TYPE string
        !REQUIRED true
        !BRIEF Input filename
        !KEYLESS 1
      !END

      !PARAMETER -out 2
        !ALIAS -o
        !TYPE string
        !BRIEF Output filename
        !KEYLESS 2
      !END

      !PARAMETER -from 3
         !ALIAS -from_language
         !TYPE string
         !DEFAULT american
         !LEGAL_VALUE american
         !LEGAL_VALUE english
         !LEGAL_VALUE us

         !LEGAL_VALUE chinese
         !LEGAL_VALUE zh
         !LEGAL_VALUE cn

         !LEGAL_VALUE danish
         !LEGAL_VALUE dk
         !LEGAL_VALUE da

         !LEGAL_VALUE dutch
         !LEGAL_VALUE nl

         !LEGAL_VALUE french
         !LEGAL_VALUE fr

         !LEGAL_VALUE german
         !LEGAL_VALUE de

         !LEGAL_VALUE greek
         !LEGAL_VALUE el

         !LEGAL_VALUE hungarian
         !LEGAL_VALUE hu

         !LEGAL_VALUE irish
         !LEGAL_VALUE ie
         !LEGAL_VALUE ga

         !LEGAL_VALUE italian
         !LEGAL_VALUE it

         !LEGAL_VALUE japanese
         !LEGAL_VALUE jp
         !LEGAL_VALUE ja

         !LEGAL_VALUE polish
         !LEGAL_VALUE pl

         !LEGAL_VALUE portuguese
         !LEGAL_VALUE pt

         !LEGAL_VALUE romanian
         !LEGAL_VALUE ro

         !LEGAL_VALUE russian
         !LEGAL_VALUE ru

         !LEGAL_VALUE slovak
         !LEGAL_VALUE sk

         !LEGAL_VALUE spanish
         !LEGAL_VALUE es

         !LEGAL_VALUE swedish
         !LEGAL_VALUE se
         !LEGAL_VALUE sv

         !LEGAL_VALUE welsh
         !LEGAL_VALUE cy

         !REQUIRED false
         !BRIEF Language for input names.
     !END

     !PARAMETER -to 4
         !ALIAS -to_language
         !TYPE string
         !DEFAULT american
         !LEGAL_VALUE american
         !LEGAL_VALUE english
         !LEGAL_VALUE us

         !LEGAL_VALUE chinese
         !LEGAL_VALUE zh
         !LEGAL_VALUE cn

         !LEGAL_VALUE danish
         !LEGAL_VALUE dk
         !LEGAL_VALUE da

         !LEGAL_VALUE dutch
         !LEGAL_VALUE nl

         !LEGAL_VALUE french
         !LEGAL_VALUE fr

         !LEGAL_VALUE german
         !LEGAL_VALUE de

         !LEGAL_VALUE greek
         !LEGAL_VALUE el

         !LEGAL_VALUE hungarian
         !LEGAL_VALUE hu

         !LEGAL_VALUE irish
         !LEGAL_VALUE ie
         !LEGAL_VALUE ga

         !LEGAL_VALUE italian
         !LEGAL_VALUE it

         !LEGAL_VALUE japanese
         !LEGAL_VALUE jp
         !LEGAL_VALUE ja

         !LEGAL_VALUE polish
         !LEGAL_VALUE pl

         !LEGAL_VALUE portuguese
         !LEGAL_VALUE pt

         !LEGAL_VALUE romanian
         !LEGAL_VALUE ro

         !LEGAL_VALUE russian
         !LEGAL_VALUE ru

         !LEGAL_VALUE slovak
         !LEGAL_VALUE sk

         !LEGAL_VALUE spanish
         !LEGAL_VALUE es

         !LEGAL_VALUE swedish
         !LEGAL_VALUE se
         !LEGAL_VALUE sv

         !LEGAL_VALUE welsh
         !LEGAL_VALUE cy

         !REQUIRED false
         !BRIEF Language for input names.
     !END

     !PARAMETER -from_charset 5
         !TYPE string
         !DEFAULT default
         !REQUIRED false
         !LEGAL_VALUE default
         !LEGAL_VALUE ascii
         !LEGAL_VALUE utf8
         !LEGAL_VALUE html
         !LEGAL_VALUE sjis
         !LEGAL_VALUE eucjp
         !BRIEF Choose charset/encoding for input names.
     !END

     !PARAMETER -to_charset 6
         !ALIAS -encoding
         !ALIAS -charset
         !TYPE string
         !DEFAULT default
         !REQUIRED false
         !LEGAL_VALUE default
         !LEGAL_VALUE ascii
         !LEGAL_VALUE utf8
         !LEGAL_VALUE html
         !LEGAL_VALUE sjis
         !LEGAL_VALUE eucjp
         !BRIEF Choose charset/encoding for output names.
     !END

     !PARAMETER -debug
         !TYPE bool
         !DEFAULT false
         !VISIBILITY hidden
     !END

!END
";

}