#!/usr/bin/env python3

# name:   check_inchikey.py
# author: nbehrnd@yahoo.com
# date:   2021-09-03 (YYYY-MM-DD)
# edit:
"""Compare InChiKeys by DataWarrior with those by OpenBabel.

Written for the CLI of Python 3.9.2 in Linux Debian 12/bullseye, this
script depends on a working installation of OpenBabel.  On occasion,
DataWarrior and OpenBabel disagree with each other about the InChiKeys
to assign to a structure; these peculiar DataWarrior SMILES are stored
in a permanent record, the new file dissenter.smi.

The mandatory input file read is DataWarriors save -> save special .txt
file, with the expectation this file contains three columns of these
data:
+ DataWarrior's structure ID string (first column)
+ DataWarrior's computed SMILES string of the former (second column)
+ DataWarrior's computed InChiKey about the structure (third column)

To use from the CLI by

python3 check_inchikey.py Random_Molecules.txt

with "Random_Molecules.txt", DataWarrior's export file.
"""

import argparse

from openbabel import openbabel
from openbabel import pybel


def get_arguments():
    """Comparison of InChiKeys by DataWarrior with those by OpenBabel."""
    parser = argparse.ArgumentParser(description="""
    Comparison of InChiKeys by DataWarrior with those by OpenBabel.""")

    parser.add_argument("dwfile", help="DataWarrior's .txt file.")
    args = parser.parse_args()

    data = args.dwfile
    return data


def openbabel_inchikey(input_smiles=""):
    """Query OpenBabel to convert SMILES into InchiKey"""
    mol = pybel.readstring("smi", input_smiles)
    check = str(mol.write("inchikey")).strip()
    return check


def inchikey_comparison(input_file=""):
    """Compare InChiKeys by DataWarrior with those by OpenBabel."""
    content = []
    dissens_list = []

    try:
        with open(input_file, mode="r") as source:
            content = source.readlines()
            for line in content[1:]:
                dw_id = line.split()[0]
                dw_smiles = line.split()[1]
                dw_inchikey = str(line.split()[2]).strip()

                control = openbabel_inchikey(input_smiles=dw_smiles)

                if control == dw_inchikey:
                    print(f". {dw_id}")
                elif control != dw_inchikey:
                    print(f"! {dw_id} {dw_inchikey} {control}")
                    dissens_list.append(dw_smiles)
    except OSError:
        print("Check if file '{}' is present.".format(input_file))

    # space cleaning
    print("\n ----")
    print("data (total)    : {}".format(len(content) - 1))
    print("data to recheck : {}".format(len(dissens_list)))
    print("See file 'dissenter.smi' for data to recheck.")

    record = "_".join([input_file, "dissenter.smi"])
    with open(record, mode="w") as newfile:
        for entry in dissens_list:
            newfile.write(f"{entry}\n")


def main():
    "Join the functions." ""
    data = get_arguments()
    inchikey_comparison(input_file=data)


if __name__ == "__main__":
    main()
