Converting Molecules to Names

Converts a file of chemical structures (specified by -in option) into chemical names (-out option), in a choice of language (-language option), encodings (-encoding option) and styles (-style option).

Listing 1: Converting molecules to names

#!/usr/bin/env python
#  Copyright (C) 2009-2014 OpenEye Scientific Software, Inc.
from __future__ import print_function
import sys
from openeye.oechem import *
from openeye.oeiupac import *

def Mol2Nam(itf):
    ifs=oemolistream()
    if not ifs.open(itf.GetString("-in")):
        OEThrow.Fatal("Unable to open '%s' for reading" % itf.GetString("-in"))

    ofs=oemolostream()
    outname = None
    if itf.HasString("-out"):
        outname = itf.GetString("-out")        
        if not ofs.open(outname):
            OEThrow.Fatal("Unable to open '%s' for reading" % outname)

    language=OEGetIUPACLanguage(itf.GetString("-language"));
    charset=OEGetIUPACCharSet(itf.GetString("-encoding"));
    style=OEGetIUPACNamStyle(itf.GetString("-style"));

    for mol in ifs.GetOEGraphMols():
        name = OECreateIUPACName(mol, style)

        if language>0:
            name = OEToLanguage(name, language)
        if itf.GetBool("-capitalize"):
            name = OECapitalizeName(name)

        if charset == OECharSet_ASCII:
            name = OEToAscii(name)
        elif charset == OECharSet_UTF8:
            name = OEToUTF8(name)
        elif charset == OECharSet_HTML:
            name = OEToHTML(name)
        elif charset == OECharSet_SJIS:
            name = OEToSJIS(name)
        elif charset == OECharSet_EUCJP:
            name = OEToEUCJP(name)

        if outname:
            if itf.HasString("-delim"):
                title=mol.GetTitle()
                name = title + itf.GetString("-delim") + name

            if itf.HasString("-tag"):
                OESetSDData(mol, itf.GetString("-tag"), name)

            mol.SetTitle(name)
            OEWriteMolecule(ofs, mol)

        else:
            print(name)
            
############################################################
InterfaceData="""
# mol2nam interface file
!CATEGORY mol2nam

    !CATEGORY I/O
        !PARAMETER -in 1
          !ALIAS -i
          !TYPE string
          !REQUIRED true
          !BRIEF Input filename
          !KEYLESS 1
        !END
        
        !PARAMETER -out 2
          !ALIAS -o
          !TYPE string
          !BRIEF Output filename
          !KEYLESS 2
        !END
    !END
    
    !CATEGORY Lexichem Features
              
        !PARAMETER -language 1
           !ALIAS -lang
           !TYPE string
           !DEFAULT american
           !LEGAL_VALUE american
           !LEGAL_VALUE english
           !LEGAL_VALUE us
        
           !LEGAL_VALUE british
           !LEGAL_VALUE uk

           !LEGAL_VALUE chinese
           !LEGAL_VALUE zh
           !LEGAL_VALUE cn

           !LEGAL_VALUE danish
           !LEGAL_VALUE dk
           !LEGAL_VALUE da

           !LEGAL_VALUE dutch
           !LEGAL_VALUE nl

           !LEGAL_VALUE french
           !LEGAL_VALUE fr

           !LEGAL_VALUE german
           !LEGAL_VALUE de
        
           !LEGAL_VALUE greek
           !LEGAL_VALUE el
        
           !LEGAL_VALUE hungarian
           !LEGAL_VALUE hu

           !LEGAL_VALUE irish
           !LEGAL_VALUE ie
           !LEGAL_VALUE ga

           !LEGAL_VALUE italian
           !LEGAL_VALUE it

           !LEGAL_VALUE japanese
           !LEGAL_VALUE jp
           !LEGAL_VALUE ja

           !LEGAL_VALUE polish
           !LEGAL_VALUE pl

           !LEGAL_VALUE portuguese
           !LEGAL_VALUE pt

           !LEGAL_VALUE romanian
           !LEGAL_VALUE ro

           !LEGAL_VALUE russian
           !LEGAL_VALUE ru

           !LEGAL_VALUE slovak
           !LEGAL_VALUE sk

           !LEGAL_VALUE spanish
           !LEGAL_VALUE es

           !LEGAL_VALUE swedish
           !LEGAL_VALUE se
           !LEGAL_VALUE sv

           !LEGAL_VALUE welsh
           !LEGAL_VALUE cy
           
           !REQUIRED false
           !BRIEF Language for output names.
        !END     
        
        !PARAMETER -style 2
            !ALIAS -namestyle 
            !TYPE string
            !DEFAULT openeye
            !LEGAL_VALUE openeye
            !LEGAL_VALUE iupac
            !LEGAL_VALUE cas
            !LEGAL_VALUE traditional
            !LEGAL_VALUE systematic
            !LEGAL_VALUE casindex 
            !LEGAL_VALUE casidx
            !LEGAL_VALUE autonom
            !LEGAL_VALUE iupac79
            !LEGAL_VALUE iupac93
            !LEGAL_VALUE acdname
            !BRIEF Style of output names
        !END            
  
        !PARAMETER -capitalize 3
           !ALIAS -capitalise
           !TYPE bool
           !DEFAULT false
           !BRIEF Capitalize output names.
        !END

        !PARAMETER -tag 4
           !TYPE string
           !REQUIRED false
           !BRIEF Set name as SD data with tag
        !END

        !PARAMETER -delim 5
           !TYPE string
           !REQUIRED false
           !BRIEF Append name to title using 'delim'
        !END

        !PARAMETER -charset 7
            !ALIAS -encoding
            !TYPE string
            !DEFAULT default
            !REQUIRED false
            !LEGAL_VALUE default
            !LEGAL_VALUE ascii
            !LEGAL_VALUE utf8
            !LEGAL_VALUE html
            !LEGAL_VALUE sjis
            !LEGAL_VALUE eucjp
            !LEGAL_VALUE konsole
            !BRIEF Choose charset/encoding for output names.
        !END

    !END

!END
"""

def main(argv=[__name__]):
    itf = OEInterface(InterfaceData, argv)
    Mol2Nam(itf)

if __name__ == "__main__":
    sys.exit(main(sys.argv))

Converting Names to Molecules

Converts a file of chemical names (specified by the -in option) of a specific language (-language option) into a file of chemical structures (specified by the -out option).

Listing 2: Converting names to molecules

#!/usr/bin/env python
#  Copyright (C) 2009 OpenEye Scientific Software, Inc.
import sys
from openeye.oechem import *
from openeye.oeiupac import *

def Nam2Mol(itf):
    ifp=sys.stdin
    if itf.GetString("-in") != "-":
        ifp=open(itf.GetString("-in"))

    ofs=oemolostream()
    if not ofs.open(itf.GetString("-out")):
        OEThrow.Fatal("Unable to open output file: %s" % itf.GetString("-out"))

    language = OEGetIUPACLanguage(itf.GetString("-language"))
    charset = OEGetIUPACCharSet(itf.GetString("-charset"));

    mol=OEGraphMol()
    for name in ifp:
        name=name.strip()
        mol.Clear()

        # Speculatively reorder CAS permuted index names
        str = OEReorderIndexName(name)
        if len(str)==0:
            str=name

        if charset == OECharSet_HTML:
            str = OEFromHTML(str)
        if charset == OECharSet_UTF8:
            str = OEFromUTF8(str)

        str = OELowerCaseName(str)

        if language != OELanguage_AMERICAN:
            str = OEFromLanguage(str,language)

        done = OEParseIUPACName(mol,str)

        if not done and itf.GetBool("-empty"):
            mol.Clear()
            done = True
    
        if done:
            if itf.HasString("-tag"):
                OESetSDData(mol, itf.GetString("-tag"),name)

            mol.SetTitle(name)
            OEWriteMolecule(ofs,mol)

                 
############################################################
InterfaceData="""
# nam2mol interface file
!CATEGORY nam2mol

    !CATEGORY I/O
        !PARAMETER -in 1
          !ALIAS -i
          !TYPE string
          !REQUIRED true
          !BRIEF Input filename
          !KEYLESS 1
        !END
        
        !PARAMETER -out 2
          !ALIAS -o
          !TYPE string
          !DEFAULT -
          !BRIEF Output filename
          !KEYLESS 2
        !END
    !END
    
    !CATEGORY Lexichem Features
              
        !PARAMETER -language 1
           !ALIAS -lang
           !TYPE string
           !DEFAULT american
           !LEGAL_VALUE american
           !LEGAL_VALUE english
           !LEGAL_VALUE us

           !LEGAL_VALUE chinese
           !LEGAL_VALUE zh
           !LEGAL_VALUE cn

           !LEGAL_VALUE danish
           !LEGAL_VALUE dk
           !LEGAL_VALUE da

           !LEGAL_VALUE dutch
           !LEGAL_VALUE nl

           !LEGAL_VALUE french
           !LEGAL_VALUE fr

           !LEGAL_VALUE german
           !LEGAL_VALUE de
        
           !LEGAL_VALUE greek
           !LEGAL_VALUE el
        
           !LEGAL_VALUE hungarian
           !LEGAL_VALUE hu

           !LEGAL_VALUE irish
           !LEGAL_VALUE ie
           !LEGAL_VALUE ga

           !LEGAL_VALUE italian
           !LEGAL_VALUE it

           !LEGAL_VALUE japanese
           !LEGAL_VALUE jp
           !LEGAL_VALUE ja

           !LEGAL_VALUE polish
           !LEGAL_VALUE pl

           !LEGAL_VALUE portuguese
           !LEGAL_VALUE pt

           !LEGAL_VALUE romanian
           !LEGAL_VALUE ro

           !LEGAL_VALUE russian
           !LEGAL_VALUE ru

           !LEGAL_VALUE slovak
           !LEGAL_VALUE sk

           !LEGAL_VALUE spanish
           !LEGAL_VALUE es

           !LEGAL_VALUE swedish
           !LEGAL_VALUE se
           !LEGAL_VALUE sv

           !LEGAL_VALUE welsh
           !LEGAL_VALUE cy
           
           !REQUIRED false
           !BRIEF Language for input names.
        !END     

        !PARAMETER -tag 3
           !TYPE string
           !REQUIRED false
           !BRIEF Set name as SD data with tag
        !END

        !PARAMETER -empty 4
           !TYPE bool
           !DEFAULT false
           !BRIEF Output an empty molecule for unparseable names
        !END
        !PARAMETER -charset 5
           !ALIAS -encoding
           !TYPE string
           !DEFAULT default
           !REQUIRED false
           !LEGAL_VALUE default
           !LEGAL_VALUE ascii
           !LEGAL_VALUE utf8
           !LEGAL_VALUE html
           !BRIEF Choose charset/encoding for input names.
        !END
    !END
!END
"""

def main(argv=[__name__]):
    itf = OEInterface(InterfaceData, argv)
    Nam2Mol(itf)

if __name__ == "__main__":
    sys.exit(main(sys.argv))

Translating Names Between Languages

Translates a file of chemical names (specified by the -in option) in a specific language (-from option) into a file of names (specified by the -out option) in another language (-to option).

Listing 3: Translate names between languages

#!/usr/bin/env python
#  Copyright (C) 2014-2015 OpenEye Scientific Software, Inc.
#
# Translates between languages.  Internally LexichemTK uses American
# English so it will convert to/from that as an intermediate
# representation.
# 
# By default the program inputs/outputs the internal LexichemTK
# character set representation.  Optionally one can convert the
# input or output to alternate encodings, eg: HTML or UTF8.
#
import sys
from openeye.oechem import *
from openeye.oeiupac import *

def Translate(itf):
    ifp = sys.stdin
    if itf.GetString("-in") != "-":
        ifp = open(itf.GetString("-in"))

    if itf.HasString("-out"):
        outname = itf.GetString("-out")
        if outname != "-":
            try:
                ofs = open(outname, 'w')
            except:
                OEThrow.Fatal("Unable to open '%s' for reading" % outname)
        else:
            ofs = sys.stdout

    from_language = OEGetIUPACLanguage(itf.GetString("-from"))
    to_language = OEGetIUPACLanguage(itf.GetString("-to"))

    from_charset = OEGetIUPACCharSet(itf.GetString("-from_charset"))
    to_charset = OEGetIUPACCharSet(itf.GetString("-to_charset"))

    for name in ifp:
        name = name.strip()

        # Convert from Charset to internal representation
        if from_charset == OECharSet_HTML:
            name = OEFromHTML(name)
        elif from_charset == OECharSet_UTF8:
            name = OEFromUTF8(name)

        # Translation functions all operate on lowercase names
        name = OELowerCaseName(name)

        if from_language != OELanguage_AMERICAN:
            name = OEFromLanguage(name, from_language)

        # At this point the name is American English in the
        # LexichemTK default internal character representation

        # Convert to output language
        if to_language != OELanguage_AMERICAN:
            name = OEToLanguage(name, to_language)

        # Convert to output charset
        if to_charset == OECharSet_ASCII:
            name = OEToAscii(name)
        elif to_charset == OECharSet_UTF8:
            name = OEToUTF8(name)
        elif to_charset == OECharSet_HTML:
            name = OEToHTML(name)
        elif to_charset == OECharSet_SJIS:
            name = OEToSJIS(name)
        elif to_charset == OECharSet_EUCJP:
            name = OEToEUCJP(name)

        ofs.write(name + '\n')

############################################################
InterfaceData="""
# translate interface file
!CATEGORY translate

      !PARAMETER -in 1
        !ALIAS -i
        !TYPE string
        !REQUIRED true
        !BRIEF Input filename
        !KEYLESS 1
      !END

      !PARAMETER -out 2
        !ALIAS -o
        !TYPE string
        !DEFAULT -
        !BRIEF Output filename
        !KEYLESS 2
      !END

      !PARAMETER -from 3
         !ALIAS -from_language
         !TYPE string
         !DEFAULT american
         !LEGAL_VALUE american
         !LEGAL_VALUE english
         !LEGAL_VALUE us

         !LEGAL_VALUE chinese
         !LEGAL_VALUE zh
         !LEGAL_VALUE cn

         !LEGAL_VALUE danish
         !LEGAL_VALUE dk
         !LEGAL_VALUE da

         !LEGAL_VALUE dutch
         !LEGAL_VALUE nl

         !LEGAL_VALUE french
         !LEGAL_VALUE fr

         !LEGAL_VALUE german
         !LEGAL_VALUE de

         !LEGAL_VALUE greek
         !LEGAL_VALUE el

         !LEGAL_VALUE hungarian
         !LEGAL_VALUE hu

         !LEGAL_VALUE irish
         !LEGAL_VALUE ie
         !LEGAL_VALUE ga

         !LEGAL_VALUE italian
         !LEGAL_VALUE it

         !LEGAL_VALUE japanese
         !LEGAL_VALUE jp
         !LEGAL_VALUE ja

         !LEGAL_VALUE polish
         !LEGAL_VALUE pl

         !LEGAL_VALUE portuguese
         !LEGAL_VALUE pt

         !LEGAL_VALUE romanian
         !LEGAL_VALUE ro

         !LEGAL_VALUE russian
         !LEGAL_VALUE ru

         !LEGAL_VALUE slovak
         !LEGAL_VALUE sk

         !LEGAL_VALUE spanish
         !LEGAL_VALUE es

         !LEGAL_VALUE swedish
         !LEGAL_VALUE se
         !LEGAL_VALUE sv

         !LEGAL_VALUE welsh
         !LEGAL_VALUE cy

         !REQUIRED false
         !BRIEF Language for input names.
     !END

     !PARAMETER -to 4
         !ALIAS -to_language
         !TYPE string
         !DEFAULT american
         !LEGAL_VALUE american
         !LEGAL_VALUE english
         !LEGAL_VALUE us

         !LEGAL_VALUE chinese
         !LEGAL_VALUE zh
         !LEGAL_VALUE cn

         !LEGAL_VALUE danish
         !LEGAL_VALUE dk
         !LEGAL_VALUE da

         !LEGAL_VALUE dutch
         !LEGAL_VALUE nl

         !LEGAL_VALUE french
         !LEGAL_VALUE fr

         !LEGAL_VALUE german
         !LEGAL_VALUE de

         !LEGAL_VALUE greek
         !LEGAL_VALUE el

         !LEGAL_VALUE hungarian
         !LEGAL_VALUE hu

         !LEGAL_VALUE irish
         !LEGAL_VALUE ie
         !LEGAL_VALUE ga

         !LEGAL_VALUE italian
         !LEGAL_VALUE it

         !LEGAL_VALUE japanese
         !LEGAL_VALUE jp
         !LEGAL_VALUE ja

         !LEGAL_VALUE polish
         !LEGAL_VALUE pl

         !LEGAL_VALUE portuguese
         !LEGAL_VALUE pt

         !LEGAL_VALUE romanian
         !LEGAL_VALUE ro

         !LEGAL_VALUE russian
         !LEGAL_VALUE ru

         !LEGAL_VALUE slovak
         !LEGAL_VALUE sk

         !LEGAL_VALUE spanish
         !LEGAL_VALUE es

         !LEGAL_VALUE swedish
         !LEGAL_VALUE se
         !LEGAL_VALUE sv

         !LEGAL_VALUE welsh
         !LEGAL_VALUE cy

         !REQUIRED false
         !BRIEF Language for input names.
     !END

     !PARAMETER -from_charset 5
         !TYPE string
         !DEFAULT default
         !REQUIRED false
         !LEGAL_VALUE default
         !LEGAL_VALUE ascii
         !LEGAL_VALUE utf8
         !LEGAL_VALUE html
         !LEGAL_VALUE sjis
         !LEGAL_VALUE eucjp
         !BRIEF Choose charset/encoding for input names.
     !END

     !PARAMETER -to_charset 6
         !ALIAS -encoding
         !ALIAS -charset
         !TYPE string
         !DEFAULT default
         !REQUIRED false
         !LEGAL_VALUE default
         !LEGAL_VALUE ascii
         !LEGAL_VALUE utf8
         !LEGAL_VALUE html
         !LEGAL_VALUE sjis
         !LEGAL_VALUE eucjp
         !BRIEF Choose charset/encoding for output names.
     !END


!END
"""

def main(argv=[__name__]):
    itf = OEInterface(InterfaceData, argv)
    Translate(itf)

if __name__ == "__main__":
    sys.exit(main(sys.argv))