OEApplyChEMBL24SolubilityTransforms¶
OESystem::OEIterBase<OEChem::OEMolBase> *
OEApplyChEMBL24SolubilityTransforms(OEChem::OEMolBase &input, int context, unsigned int minMMPThreshold=5)
Given an input molecule, apply transformations derived from solubility data obtained from the
[ChEMBL24-2018] database. The context argument controls the amount of chemistry information
that should be included for the transformation reaction, see OEMatchedPairContext
.
This function supports only the OEMatchedPairContext::Bond0
or
OEMatchedPairContext::Bond2
context values. The minMMPThreshold
argument will only
apply transformations that meet or exceed the specified number of matched pairs. Use a minMMPThreshold
value of 0
to apply all transformations regardless of the number of matched pairs associated with them.
Note
This function does not perform any validation or filtering on the input molecule to be transformed. The caller is expected to perform validity and/or size checking to ensure sensible inputs are provided.
In the examples below, the input structures are transformed by the ChEMBL solubility transforms and exported
to a file format that supports SD data. Each transformed structure will contain information about the
solubility transform (as SMIRKS) that generated it, and the matched pair information associated with each
transform (ChEMBL identifiers and solubility data). The added annotation data will contain the data fields,
OEMMP_normalized_value (uM)
, OEMMP_published_value
, OEMMP_examples (SMILES)
, and OEMMP_transform (SMILES)
for subsequent analysis.
// number of bonds of chemistry context at site of change
// for the applied transforms
unsigned int totalmols = 0u;
unsigned int xformctxt = OEMatchedPairContext::Bond2;
unsigned int molidx = 0u;
OEGraphMol mol;
while (OEReadMolecule(ifs, mol))
{
++molidx;
// consider only the largest input fragment
OEDeleteEverythingExceptTheFirstLargestComponent(mol);
unsigned int smolcnt = 0;
// only consider solubility transforms having at least 5 matched pairs
for (OEIter<OEMolBase> solMol = OEApplyChEMBL24SolubilityTransforms(mol, xformctxt, 5); solMol; ++solMol)
{
// compute net change in solubility from MMP data
if (OEHasSDData(solMol, "OEMMP_normalized_value (uM)"))
{
std::string sditem = (OEGetSDData(solMol,"OEMMP_normalized_value (uM)"));
std::vector<std::string> lines;
if (OEStringTokenize(lines, sditem, "\n", false) && lines.size() > 1)
{
std::vector<float> deltasol;
for (std::vector<std::string>::iterator line = lines.begin(); line != lines.end(); ++line)
{
std::vector<std::string> sdvalues;
if (OEStringTokenize(sdvalues, *line, ",", false) && sdvalues.size() > 3)
{
// fromIndex,toIndex,fromValue,toValue
if (sdvalues[2].empty() || sdvalues[3].empty())
continue;
float fromVal, toVal;
if (OEStringToNumber(sdvalues[3],toVal) &&
OEStringToNumber(sdvalues[2],fromVal))
deltasol.push_back(toVal - fromVal);
}
}
float avgsol = average(deltasol);
// reject examples with net decrease in solubility
if (avgsol < 0.0f)
continue;
float sdev = stddev(deltasol);
// annotate with average,stddev,num
OEAddSDData(solMol,
"OEMMP_average_delta_normalized_value",
OEMakeString("%.2f,%.2f,%d", avgsol, sdev, deltasol.size()));
}
// export solubility transformed molecule with SDData annotations
if (OEWriteMolecule(ofs, solMol) == OEWriteMolReturnCode::Success)
smolcnt += 1;
}
}
OEThrow.Info("%d: Exported molecule count, %d", molidx, smolcnt);
totalmols += smolcnt;
}