Exporting SD data to a CSV file
A program that converts the tags on an SD file to CSV format. The output has on the first line the molecule title, followed by all the unique tags found in the input database.
Example
prompt > sdf2csv.py input.sdf output.csv
Code
Download code
#!/usr/bin/env python
# (C) 2022 Cadence Design Systems, Inc. (Cadence)
# All rights reserved.
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of Cadence products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. Cadence claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable Cadence offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED. OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall Cadence be
# liable for any damages or liability in connection with the Sample Code
# or its use.
#############################################################################
# Extract properties from SD file and save as CSV
#############################################################################
import sys
from openeye import oechem
def SDF2CSV(ifs, csv):
taglist = []
# read through once to find all unique tags
for mol in ifs.GetOEGraphMols():
for dp in oechem.OEGetSDDataPairs(mol):
if dp.GetTag() not in taglist:
taglist.append(dp.GetTag())
ifs.rewind()
# print out column labels
header = "Title"
for tag in taglist:
header += ",%s" % tag
header += '\n'
csv.write(header)
# build csv file
for mol in ifs.GetOEGraphMols():
line = [mol.GetTitle()]
for tag in taglist:
if oechem.OEHasSDData(mol, tag):
value = oechem.OEGetSDData(mol, tag)
else:
value = ''
line.append(',')
line.append(value)
csv.write(''.join(line))
csv.write('\n')
def main(argv=[__name__]):
if len(argv) != 3:
oechem.OEThrow.Usage("%s <infile> <csvfile>" % argv[0])
ifs = oechem.oemolistream()
if not ifs.open(argv[1]):
oechem.OEThrow.Fatal("Unable to open %s for reading" % argv[1])
if ifs.GetFormat() not in [oechem.OEFormat_SDF, oechem.OEFormat_OEB]:
oechem.OEThrow.Fatal("Only works for sdf or oeb input files")
csv = oechem.oeofstream()
if not csv.open(argv[2]):
oechem.OEThrow.Fatal("Unable to open %s for writing" % argv[2])
SDF2CSV(ifs, csv)
if __name__ == '__main__':
sys.exit(main(sys.argv))
See also
OEGetSDDatafunctionOEGetSDDataPairsfunction