One-Hot Encoding SMILES
Last updated
from global_chem import GlobalChem
from global_chem_extensions import GlobalChemExtensions
gc = GlobalChem()
cheminformatics = GlobalChemExtensions().cheminformatics()smiles_list = list(gc.get_node_smiles('pihkal').values())
encoded_smiles = cheminformatics.encode_smiles(smiles_list, max_length=200)
print ('Encoded SMILES: %s' % encoded_smiles[0])
decoded_smiles = cheminformatics.decode_smiles(encoded_smiles)
print ('Decoded SMILES: %s' % decoded_smiles[0])Encoded SMILES: [[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
...
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]]
Decoded SMILES: CCC(N)CC1=CC(=C(OC)C(=C1)OC)OC__SMILES_MAPPING__ = [
' ',
'#', '%', '(', ')', '+', '-', '.', '/',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'=', '@',
'A', 'B', 'C', 'F', 'H', 'I', 'K', 'L', 'M', 'N', 'O', 'P',
'R', 'S', 'T', 'V', 'X', 'Z',
'[', '\\', ']',
'a', 'b', 'c', 'e', 'g', 'i', 'l', 'n', 'o', 'p', 'r', 's',
't', 'u',
'&', ':', '*'
]