nlp_using_smiles_&_properties.py

# -*- coding: utf-8 -*-
"""NLP using SMILES & Properties.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1DcQstFDu7avXT6MPt-ihmADKuMCqhLrJ
"""

#importing necessary libraries
import pubchempy as pcp
import nltk
from nltk.tokenize import word_tokenize

#function to extract compounds from PubChem
def extract_compounds(smiles, properties):
    
    #tokenize the input strings
    smiles_tokens = word_tokenize(smiles) 
    properties_tokens = word_tokenize(properties)
    
    #loop through the tokens to find compounds
    compounds = []
    for token in smiles_tokens:
        try:
            compound = pcp.get_compounds(token, 'smiles')
            compounds.append(compound)
        except:
            pass
    for token in properties_tokens:
        try:
            compound = pcp.get_compounds(token, 'property')
            compounds.append(compound)
        except:
            pass
    
    #return the list of  compounds
    return compounds

#example usage
smiles = 'C1CN2CC3=CC=CC=C3N=C2[C@H]1O'
properties = 'molecular_weight>180'
compounds = extract_compounds(smiles, properties)
print(compounds)

import requests

def get_compound_name(smiles):
    # Define the PubChem API URL
    url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/{}/property/IUPACName,MolecularWeight,CanonicalSMILES,InChIKey/JSON".format(smiles)

    # Send a request to the API and get the response
    response = requests.get(url)

    # Check if the request was successful
    if response.status_code == 200:
        # Extract the name of the compound from the response
        data = response.json()
        compound_name = data['PropertyTable']['Properties'][0]['IUPACName']
        return compound_name
    else:
        # Return an error message if the request was not successful
        return "Error: Unable to fetch data from PubChem"

# Example usage
smiles = "CC(=O)Oc1ccccc1C(=O)O"
compound_name = get_compound_name(smiles)
print("Compound name:", compound_name)

!pip install pubchempy

import nltk
nltk.download('punkt')