from rdkit import Chem smiles_source = "smiles_list.smi" grand_total = 0 # example pattern to identify and count: functional_group = Chem.MolFromSmarts('[CH3]') # methyl group # alternative examples: #functional_group = Chem.MolFromSmarts('c1ccccn1') # for a pyridine #functional_group = Chem.MolFromSmarts('C1CCCCC1') # for cyclohexane with open(smiles_source, mode="r") as source_file: for index, line in enumerate(source_file, start=1): molecule = Chem.MolFromSmiles(line.strip()) match = molecule.GetSubstructMatches(functional_group) print("{:3} matches in entry {:2}: {}.".format( len(match), index, line.strip())) grand_total += len(match) print("\nIn total {} instances were identified.".format(grand_total))