In [1]:
"""Compare InChiKeys generated by DW with those by OpenBabel

Following the question in the forum perhaps there are other
structures DW struggles to convert into InChiKeys.

This Jupyter notebook was written for and tested in Linux
Debian 12/bookworm (branch testing) for Python 3.9.2,
Jupyter 6.2.0, and OpenBabel 3.1.1 as provided by the
Debian's repositories.
"""

import os
from openbabel import openbabel as ob
from openbabel import pybel
In [2]:
# Comparison with OpenBabel
#
# The instruction to OpenBabel 3.1.1 on the CLI about pyridine
#
# obabel -:"c1ccncc1" -oinchikey
#
# yields JUJWROOIHBZHMG-UHFFFAOYSA-N

input_smiles = str("c1ccncc1")
mol = pybel.readstring("smi", input_smiles)
print(mol.write("inchikey"))
JUJWROOIHBZHMG-UHFFFAOYSA-N

In [3]:
# DataWarrior was requested to generate a library of random
# molecules.  These entered as data in the first column of a
# new work sheet.  The data were appended by SMILES and
# InChiKeys, equally computed by DW (second and third column)
# in the table.  The results were exported as .dwar and save
# -> save special .txt file; the later one accessed as the
# input for this notebook.

input_file = str("Random_Molecules.txt")

# count_total = 0
dissens_list = []

def openbabel_inchikey(input_smiles=""):
    """Query OpenBabel to convert SMILES into InchiKey"""
    mol = pybel.readstring("smi", input_smiles)
    check = str(mol.write("inchikey")).strip()
    return check


def inchikey_comparison(input_file=""):
    """Compare InChiKeys by DataWarrior with those by OpenBabel."""
    with open(input_file, mode="r") as source:
        content = source.readlines()
        for line in content[1:]:
            dw_id = line.split()[0]
            dw_smiles = line.split()[1]
            dw_inchikey = str(line.split()[2]).strip()

            control = openbabel_inchikey(input_smiles=dw_smiles)

            if control == dw_inchikey:
                print(f". {dw_id}")
            elif control != dw_inchikey:
                print(f"! {dw_id} {dw_inchikey} {control}")
                dissens_list.append(dw_smiles)

    # space cleaning
    print("\n ----")
    print("data (total)    : {}".format(len(content) - 1))
    print("data to recheck : {}".format(len(dissens_list)))
    print("See file 'dissenter.smi' for data to recheck.")

    with open("dissenter.smi", mode="w") as newfile:
        for entry in dissens_list:
            newfile.write(f"{entry}\n")


def main():
    """Join the functions."""
    inchikey_comparison(input_file=input_file)

main()
. fekaJ@LPQKVRuZL@QddabbbbVLrffL]EZhFXJIff@@@
! eo`TIH@LLhDAECDbfn`@cIIBhhhidhXhh\YdkHkgejAhahifh@@@`lP CGVGNRKLNQSRHM-UHFFFAOYSA-N CGVGNRKLNQSRHM-UVTDQMKNSA-N
. fle`p@H`PspTyEMEEddieBJQkMA@@EM@@@
! fg`P@A@QN~QSQISZJJKSQIZIWdnkZ@HjZhFBbhDJQd`uf@ FJJQKWPOZDMDAX-YADARESESA-N FJJQKWPOZDMDAX-KSKJNGAJSA-N
. feg@Q@NV|tDPbHrJYIQIIQPzJZKRTxHytuSPQEPP@DHh@
! fmg@R@F}GLDQdTRTvbbbTvrTTmFV}RuPDQUTsTHXcH@ NSQBENKRBWQXDR-IFXJQAMLSA-N NSQBENKRBWQXDR-LIRRHRJNSA-N
. eg`XN@@F@DACHbnlbfbbfabrtrfTtRTfcgausPQ@PDuUP@@HYIi@@
. e`\XF@@J`DEOGolbfbbbbbTqdRtjVf`aedUSVusP@TARUTDDLB@@@
. eghPJH@JNBmc`HcHheEDmddeBihheDpHYXEDm]MUT@@ELDC@@a`bD@@
. eodPJ@@@D@dblbfbbTRbbbRbTnQVt\bzjfn^ZhHA`bjbb@@DLEY@@

 ----
data (total)    : 10
data to recheck : 3
See file 'dissenter.smi' for data to recheck.
In [ ]: