#include "Mol2_file.h"

#include "Log.h"

#include "Atom.h"
#include "Atom_kit.h"
#include "Bond.h"
#include "Model.h"
#include "Model_kit.h"
#include "User.h"
#include "Text.h"
#include "Create.h"

#include <fstream>
#include <sstream>
#include <string>
#include <algorithm>
#include <functional>
#include <cctype>

namespace MM
{
Mol2_file Mol2_file::prototype_(it_is_a_prototype);


Mol2_file::
Mol2_file (Prototype const &)
:
    Model_file (Prototype())
{
    add_extension ("mol2");
    add_extension ("ml2");
}

Mol2_file::
Mol2_file (Model & model)
:
    Model_file (model),
    line_counter_(-1),
    section_data_line_counter_(-1)
{
    add_extension ("mol2");
    add_extension ("ml2");
}

Model_file * Mol2_file::
clone (Model & model) const
{
    Mol2_file *result = new Mol2_file (model);
    
    //result->extension_ = extension_;   //fix Array += should work
    //for (int i=0;  i<extension_.size();  ++i)
    //    result->extension_.push_back (extension_[i]);
    
    return result;
}

void Mol2_file::
//save (bool to_comment)
save (bool )
{
    //FIX;
}

void Mol2_file::
check_file_name()
{
    return ; //fix
}

bool Mol2_file::
next_line (std::ifstream & in, std::string & line)
{
    while (std::getline (in, line))
    {
        ++line_counter_;
        if (line[0] != '#' &&                                      //not a comment
            line.find_first_not_of (" \t\r") != std::string::npos) //not a blank line
            return true;
    }
    return false;
}

void Mol2_file::
add (Text const & file_name)
{
    line_counter_ = 0;

    std::ifstream   file (file_name.c_str());
    std::string     line;

    if (file.fail())
    {
        to_user(). error (Text("Cannot open file " + file_name));
        return;
    }

    try
    {
        while (next_line (file, line))
        {
            //log () << "->" << line.c_str() << "<-\n";

            if (line[0] == '@') // Is a Record Type Indicator (RTI)
                detect_record_type (line);
            else
                read_data_line (file, line);
        }

        //check_integrity (); //fix
    }
    catch (Text & error)
    {
        to_user().error (Text() + 
            "File " + file_name + "\n"
            "is broken at line " + line_counter_ + ":\n" +
            error);
    }
}

void Mol2_file::
detect_record_type (std::string & line)
{
    section_data_line_counter_ = 0;

    using namespace std;
    string          type;
    istringstream   rti (line);
    
    rti >> type;
    if (rti.fail())
        throw Text(line.c_str())+"\nThis is not TRIPOS Record Type Indicator.";
    
    //log () << type.c_str() << "\n";
    if      (type == "@<TRIPOS>ALT_TYPE")
        read_data_line_function_ = &Mol2_file::read_ALT_TYPE;
    else if (type == "@<TRIPOS>ANCHOR_ATOM")
        read_data_line_function_ = &Mol2_file::read_ANCHOR_ATOM;
    else if (type == "@<TRIPOS>ASSOCIATED_ANNOTATION")
        read_data_line_function_ = &Mol2_file::read_ASSOCIATED_ANNOTATION;
    else if (type == "@<TRIPOS>ATOM")
        read_data_line_function_ = &Mol2_file::read_ATOM;
    else if (type == "@<TRIPOS>BOND")
        read_data_line_function_ = &Mol2_file::read_BOND;
    else if (type == "@<TRIPOS>CENTER_OF_MASS")
        read_data_line_function_ = &Mol2_file::read_CENTER_OF_MASS;
    else if (type == "@<TRIPOS>CENTROID")
        read_data_line_function_ = &Mol2_file::read_CENTROID;
    else if (type == "@<TRIPOS>COMMENT")
        read_data_line_function_ = &Mol2_file::read_COMMENT;
    else if (type == "@<TRIPOS>CRYSIN")
        read_data_line_function_ = &Mol2_file::read_CRYSIN;
    else if (type == "@<TRIPOS>DICT")
        read_data_line_function_ = &Mol2_file::read_DICT;
    else if (type == "@<TRIPOS>DATA_FILE")
        read_data_line_function_ = &Mol2_file::read_DATA_FILE;
    else if (type == "@<TRIPOS>EXTENSION_POINT")
        read_data_line_function_ = &Mol2_file::read_EXTENSION_POINT;
    else if (type == "@<TRIPOS>FF_PBC")
        read_data_line_function_ = &Mol2_file::read_FF_PBC;
    else if (type == "@<TRIPOS>FFCON_ANGLE")
        read_data_line_function_ = &Mol2_file::read_FFCON_ANGLE;
    else if (type == "@<TRIPOS>FFCON_DIST")
        read_data_line_function_ = &Mol2_file::read_FFCON_DIST;
    else if (type == "@<TRIPOS>FFCON_MULTI")
        read_data_line_function_ = &Mol2_file::read_FFCON_MULTI;
    else if (type == "@<TRIPOS>FFCON_RANGE")
        read_data_line_function_ = &Mol2_file::read_FFCON_RANGE;
    else if (type == "@<TRIPOS>FFCON_TORSION")
        read_data_line_function_ = &Mol2_file::read_FFCON_TORSION;
    else if (type == "@<TRIPOS>LINE")
        read_data_line_function_ = &Mol2_file::read_LINE;
    else if (type == "@<TRIPOS>LSPLANE")
        read_data_line_function_ = &Mol2_file::read_LSPLANE;
    else if (type == "@<TRIPOS>MOLECULE")
        read_data_line_function_ = &Mol2_file::read_MOLECULE;
    else if (type == "@<TRIPOS>NORMAL")
        read_data_line_function_ = &Mol2_file::read_NORMAL;
    else if (type == "@<TRIPOS>QSAR_ALIGN_RULE")
        read_data_line_function_ = &Mol2_file::read_QSAR_ALIGN_RULE;
    else if (type == "@<TRIPOS>RING_CLOSURE")
        read_data_line_function_ = &Mol2_file::read_RING_CLOSURE;
    else if (type == "@<TRIPOS>ROTATABLE_BOND")
        read_data_line_function_ = &Mol2_file::read_ROTATABLE_BOND;
    else if (type == "@<TRIPOS>SEARCH_DIST")
        read_data_line_function_ = &Mol2_file::read_SEARCH_DIST;
    else if (type == "@<TRIPOS>SEARCH_OPTIONS")
        read_data_line_function_ = &Mol2_file::read_SEARCH_OPTIONS;
    else if (type == "@<TRIPOS>SET")
        read_data_line_function_ = &Mol2_file::read_SET;
    else if (type == "@<TRIPOS>SUBSTRUCTURE")
        read_data_line_function_ = &Mol2_file::read_SUBSTRUCTURE;
    else if (type == "@<TRIPOS>U_FEAT")
        read_data_line_function_ = &Mol2_file::read_U_FEAT;
    else if (type == "@<TRIPOS>UNITY_ATOM_ATTR")
        read_data_line_function_ = &Mol2_file::read_UNITY_ATOM_ATTR;
    else if (type == "@<TRIPOS>UNITY_BOND_ATTR")
        read_data_line_function_ = &Mol2_file::read_UNITY_BOND_ATTR;
    else 
        throw Text(line.c_str())+"\nThis is not TRIPOS Record Type Indicator.";
}

void Mol2_file::
read_data_line (std::ifstream & in, std::string & line)
{
    ++section_data_line_counter_;

    // { Combine back slash separated lines.
    bool attach_next_line = false;
    std::string add_on;

    std::string::size_type position = line.find_last_not_of(" \t");
    if (position == std::string::npos)  
        throw Text ("Blank line should not be here.");

    if (line [position] == '\\')
    {
        attach_next_line = true;
        line [position] = ' ';
    }

    while (attach_next_line)
    {
        next_line (in, add_on);
        position = add_on.find_last_not_of(" \t");

        if (position == std::string::npos)
            throw Text ("Blank line should not be here.");

        if (add_on [position] != '\\')
            attach_next_line = false;

        add_on [position] = ' ';
        line += add_on;
    }
    // }

    //log () << line.c_str() << "\n";
    (this->*read_data_line_function_) (line);  
}

void Mol2_file::read_ALT_TYPE (std::string & ) {}
void Mol2_file::read_ANCHOR_ATOM     (std::string & ) {}
void Mol2_file::read_ASSOCIATED_ANNOTATION     (std::string & ) {}

void Mol2_file::
read_ATOM (std::string & line)
{
    using namespace std;
    istringstream   stream (line);

    int             atom_id;
    string          atom_name;
    double          x, y, z;
    string          atom_type;
    int             substructure_id   = 1;
    string          substructure_name ("none");
    double          charge            = 0.;
    string          status_bit;

    stream >> atom_id >> atom_name >> x >> y >> z >> atom_type;
    if (stream.fail())
        throw Text (line.c_str()) + 
            "\nFormat of ATOM data line should be:\n"
            "atom_id atom_name x y z atom_type "
            "[subst_id [subst_name [charge [status_bit]]]]";

    Element         element;

    if (is_not_a_SYBYL (atom_type, element))
        throw Text ("'") + atom_type.c_str() + "'" 
            + " is not a SYBYL atom type.";

    stream >> substructure_id >> substructure_name >> charge >> status_bit;

    //Atom *current_atom = create_atom (element);
    Atom & current_atom = new_atom (element);

    current_atom.kit().set_mm_type (atom_name.c_str()); //fix

    current_atom.kit().set_number (atom_id); 
    current_atom.set_x      (x);
    current_atom.set_y      (y);
    current_atom.set_z      (z);
    current_atom.set_charge (charge);

    //log() << Text(atom_id) << " " << atom_name.c_str()  << " " << element.c_str() << "\n";

    //model(). add_atom (current_atom);
}

void Mol2_file::
read_BOND (std::string & line)
{
    using namespace std;
    istringstream   stream (line);

    int bond_id , origin_atom_id, target_atom_id;
    string bond_type;

    stream >> bond_id >> origin_atom_id >> target_atom_id >> bond_type;
    if (stream.fail())
        throw Text (line.c_str()) + 
            "\nFormat of BOND data line should be:\n"
            "bond_id origin_atom_id target_atom_id bond_type [status_bits]";

    //transform (bond_type.begin(), bond_type.end(), bond_type.begin(), tolower);
    transform (bond_type.begin(), bond_type.end(), bond_type.begin(), ::tolower); //fix for G++
    
    Atom * origin_atom = 0;
    Atom * target_atom = 0;
    int atom_count = model().atom_count();

    if (origin_atom_id <= atom_count &&
        model().atom (origin_atom_id-1).kit().number() == origin_atom_id)
    {
        origin_atom = &model().atom (origin_atom_id-1);
    }
    else
    {
        for (int i=0;  i<atom_count; ++i)
            if (model().atom(i).kit().number() == origin_atom_id)
                origin_atom = &model().atom(i);
    }
    if (origin_atom == 0)
        throw Text (line.c_str()) + 
            "\nOrigin atom " + origin_atom_id + " is not found.";

    if (target_atom_id <= atom_count &&
        model().atom (target_atom_id-1).kit().number() == target_atom_id)
    {
        target_atom = &model().atom (target_atom_id-1);
    }
    else
    {
        for (int i=0;  i<atom_count; ++i)
            if (model().atom(i).kit().number() == target_atom_id)
                target_atom = &model().atom(i);
    }
    if (target_atom == 0)
        throw Text (line.c_str()) + 
            "\nTarget atom " + target_atom_id + " is not found.";

    char type;
    if      (bond_type == "1")
                                    type = 's';
    else if (bond_type == "2")
                                    type = 'd';
    else if (bond_type == "3")
                                    type = 't';
    else if (bond_type == "am")
                                    type = 'm';
    else if (bond_type == "ar")
                                    type = 'a';
    else if (bond_type == "du")
                                    type = 'y';
    else if (bond_type == "un")
                                    type = 'u';
    else if (bond_type == "nc")
                                    type = 'n';
    else throw Text (line.c_str()) + 
            "\n" + bond_type.c_str() + " is not a valid type."
            "\nValid types: 1, 2, 3, am, ar, du, un, nc.";


//    Bond &current_bond = new_bond (*origin_atom, *target_atom, Order(type));
    new_bond (*origin_atom, *target_atom, Order(type));

    //model(). add_bond (current_bond);
}

void Mol2_file::read_CENTER_OF_MASS     (std::string & ) {}
void Mol2_file::read_CENTROID     (std::string & ) {}
void Mol2_file::read_COMMENT     (std::string & ) {}
void Mol2_file::read_CRYSIN     (std::string & ) {}
void Mol2_file::read_DICT     (std::string & ) {}
void Mol2_file::read_DATA_FILE     (std::string & ) {}
void Mol2_file::read_EXTENSION_POINT     (std::string & ) {}
void Mol2_file::read_FF_PBC     (std::string & ) {}
void Mol2_file::read_FFCON_ANGLE     (std::string & ) {}
void Mol2_file::read_FFCON_DIST     (std::string & ) {}
void Mol2_file::read_FFCON_MULTI     (std::string & ) {}
void Mol2_file::read_FFCON_RANGE     (std::string & ) {}
void Mol2_file::read_FFCON_TORSION     (std::string & ) {}
void Mol2_file::read_LINE     (std::string & ) {}
void Mol2_file::read_LSPLANE     (std::string & ) {}

void Mol2_file::
read_MOLECULE (std::string & line)
{
    using namespace std;
    istringstream   stream (line);

    switch (section_data_line_counter_)
    {
    case 1:
        model().kit().set_file_name (line.c_str());
        break;

    case 2:
        {
        int number_of_atoms;
        stream >> number_of_atoms;
        if (stream.fail())
            throw Text (line.c_str()) + 
                "\nSecond MOLECULE data line should contain number of atoms.";
        }
        break;

    case 3:
        {
        string molecule_type;
        stream >> molecule_type;
        if (stream.fail())
            throw Text (line.c_str()) + 
                "\nThird MOLECULE data line should contain molecule type."
                "\nValid types: SMALL, PROTEIN, NUCLEIC_ACID, SACCHARIDE";
            
        if (!(molecule_type == "SMALL"        || 
              molecule_type == "PROTEIN"      || 
              molecule_type == "NUCLEIC_ACID" || 
              molecule_type == "SACCHARIDE"   ))

              to_user().warning (Text ("Problem in line:\n") + line.c_str() +
                "\nThird MOLECULE data line should contain molecule type."
                "\nValid types: SMALL, PROTEIN, NUCLEIC_ACID, SACCHARIDE");
        }
        break;

    case 4:
        {
        string charge_type;
        stream >> charge_type;
        if (stream.fail() || 
            !(charge_type == "USER_CHARGES"     || 
              charge_type == "NO_CHARGES"       || 
              charge_type == "DEL_RE"           || 
              charge_type == "GASTEIGER"        || 
              charge_type == "GAST_HUCK"        || 
              charge_type == "HUCKEL"           || 
              charge_type == "PULLMAN"          || 
              charge_type == "GAUSS80_CHARGES"  || 
              charge_type == "AMPAC_CHARGES"    || 
              charge_type == "MULLIKEN_CHARGES" || 
              charge_type == "DICT_CHARGES"     || 
              charge_type == "MMFF94_CHARGES"   ))

            throw Text (line.c_str()) + 
                "\nFourth MOLECULE data line should contain "
                "the type of charges associated with the molecule."
                "\nValid types: NO_CHARGES, DEL_RE, GASTEIGER, GAST_HUCK, "
                "HUCKEL, PULLMAN, GAUSS80_CHARGES,\n"
                "AMPAC_CHARGES, MULLIKEN_CHARGES, DICT_CHARGES, "
                "MMFF94_CHARGES, USER_CHARGES";
        }
        break;

    case 5:
        // internal SYBYL status bits
        break;

    case 6:
        model().kit().set_comment_line (line.c_str());
        to_user().status (line.c_str());                    //fix
        break;

    default:  throw Text ("MOLECULE record should consist of six data lines");
    }
}

void Mol2_file::read_NORMAL     (std::string & ) {}
void Mol2_file::read_QSAR_ALIGN_RULE     (std::string & ) {}
void Mol2_file::read_RING_CLOSURE     (std::string & ) {}
void Mol2_file::read_ROTATABLE_BOND     (std::string & ) {}
void Mol2_file::read_SEARCH_DIST     (std::string & ) {}
void Mol2_file::read_SEARCH_OPTIONS     (std::string & ) {}
void Mol2_file::read_SET     (std::string & ) {}

void Mol2_file::
read_SUBSTRUCTURE (std::string & )
{
    //log () << "read_SUBSTRUCTURE\n";
}

void Mol2_file::read_U_FEAT     (std::string & ) {}
void Mol2_file::read_UNITY_ATOM_ATTR     (std::string & ) {}
void Mol2_file::read_UNITY_BOND_ATTR     (std::string & ) {}

namespace 
{
    struct SYBYL_atom_type_holder
    {
        std::vector <Mol2_file::SYBYL_atom_type>    types;

        SYBYL_atom_type_holder ()
        {
            types.push_back (Mol2_file::SYBYL_atom_type ("H",       "13", "H",     "hydrogen"));
            types.push_back (Mol2_file::SYBYL_atom_type ("H",       "13", "H.spc", "hydrogen in Single Point Charge (SPC) water model"));
            types.push_back (Mol2_file::SYBYL_atom_type ("H",       "13", "H.t3p", "hydrogen in Transferable intermolecular Potential (TIP3P) water model"));
            
            types.push_back (Mol2_file::SYBYL_atom_type ("C",       "1 ", "C.3",   "carbon sp3"));
            types.push_back (Mol2_file::SYBYL_atom_type ("C",       "2 ", "C.2",   "carbon sp2"));
            types.push_back (Mol2_file::SYBYL_atom_type ("C",       "4 ", "C.1",   "carbon sp"));
            types.push_back (Mol2_file::SYBYL_atom_type ("C",       "3 ", "C.ar",  "carbon aromatic"));
            types.push_back (Mol2_file::SYBYL_atom_type ("C",       "33", "C.cat", "carbocation (C+) used only in a guadinium group"));
            
            types.push_back (Mol2_file::SYBYL_atom_type ("O",       "8 ", "O.3",   "oxygen sp3"));
            types.push_back (Mol2_file::SYBYL_atom_type ("O",       "9 ", "O.2",   "oxygen sp2"));
            types.push_back (Mol2_file::SYBYL_atom_type ("O",       "32", "O.co2", "oxygen in carboxylate and phosphate groups"));
            types.push_back (Mol2_file::SYBYL_atom_type ("O",       "8 ", "O.spc", "oxygen in Single Point Charge (SPC) water model"));
            types.push_back (Mol2_file::SYBYL_atom_type ("O",       "8 ", "O.t3p", "oxygen in Transferable Intermolecular Potential (TIP3P) water model"));
            
            types.push_back (Mol2_file::SYBYL_atom_type ("N",       "5 ", "N.3",   "nitrogen sp3"));
            types.push_back (Mol2_file::SYBYL_atom_type ("N",       "6 ", "N.2",   "nitrogen sp2"));
            types.push_back (Mol2_file::SYBYL_atom_type ("N",       "7 ", "N.1",   "nitrogen sp"));
            types.push_back (Mol2_file::SYBYL_atom_type ("N",       "11", "N.ar",  "nitrogen aromatic"));
            types.push_back (Mol2_file::SYBYL_atom_type ("N",       "28", "N.am",  "nitrogen amide"));
            types.push_back (Mol2_file::SYBYL_atom_type ("N",       "19", "N.pl3", "nitrogen trigonal planar"));
            types.push_back (Mol2_file::SYBYL_atom_type ("N",       "31", "N.4",   "nitrogen sp3 positively charged"));
            
            types.push_back (Mol2_file::SYBYL_atom_type ("S",       "10", "S.3",   "sulfur sp3"));
            types.push_back (Mol2_file::SYBYL_atom_type ("S",       "18", "S.2",   "sulfur sp2"));
            types.push_back (Mol2_file::SYBYL_atom_type ("S",       "29", "S.O",   "sulfoxide sulfur"));
            types.push_back (Mol2_file::SYBYL_atom_type ("S",       "30", "S.O2",  "sulfone sulfur"));
            //fix
            types.push_back (Mol2_file::SYBYL_atom_type ("S",       "29", "S.o",   "sulfoxide sulfur"));
            types.push_back (Mol2_file::SYBYL_atom_type ("S",       "30", "S.o2",  "sulfone sulfur"));

            types.push_back (Mol2_file::SYBYL_atom_type ("P",       "12", "P.3",   "phosphorous sp3"));
            types.push_back (Mol2_file::SYBYL_atom_type ("F",       "16", "F",     "fluorine"));
            types.push_back (Mol2_file::SYBYL_atom_type ("Cl",      "15", "Cl",    "chlorine"));
            types.push_back (Mol2_file::SYBYL_atom_type ("Br",      "14", "Br",    "bromine"));
            types.push_back (Mol2_file::SYBYL_atom_type ("I",       "17", "I",     "iodine"));
            types.push_back (Mol2_file::SYBYL_atom_type ("Si",      "27", "Si",    "silicon"));
            types.push_back (Mol2_file::SYBYL_atom_type ("Lp",      "20", "LP",    "lone pair"));
            types.push_back (Mol2_file::SYBYL_atom_type ("Dummy",   "26", "Du",    "dummy"));
            types.push_back (Mol2_file::SYBYL_atom_type ("Na",      "21", "Na",    "sodium"));
            types.push_back (Mol2_file::SYBYL_atom_type ("K",       "22", "K",     "potassium"));
            types.push_back (Mol2_file::SYBYL_atom_type ("Ca",      "23", "Ca",    "calcium"));
            types.push_back (Mol2_file::SYBYL_atom_type ("Li",      "24", "Li",    "lithium"));
            types.push_back (Mol2_file::SYBYL_atom_type ("Al",      "25", "Al",    "aluminum"));

            // Quantitative Structure-Property Relationships
            types.push_back (Mol2_file::SYBYL_atom_type ("Any",     "- ", "Any",   "any atom (QSPR)"));
            types.push_back (Mol2_file::SYBYL_atom_type ("Unknown", "- ", "Hal",   "halogen (QSPR)"));
            types.push_back (Mol2_file::SYBYL_atom_type ("Unknown", "- ", "Het",   "heteroatom = N, O, S, P (QSPR)"));
            types.push_back (Mol2_file::SYBYL_atom_type ("Unknown", "- ", "Hev",   "heavy (non hydrogen) atom (QSPR)"));
        }
    };

    class Of_type : public std::unary_function<Mol2_file::SYBYL_atom_type,bool>
    {
        const std::string & atom_type_;

    public:
        bool operator ()(const Mol2_file::SYBYL_atom_type & SYBYL_type) const
            {return atom_type_ == SYBYL_type.code_;}

        explicit Of_type (const std::string & atom_type) : atom_type_(atom_type) {}
    }; 
}//namespace

std::vector <Mol2_file::SYBYL_atom_type> & Mol2_file::
atom_types ()
{
    static SYBYL_atom_type_holder holder;
    return holder.types;
}

bool Mol2_file::
is_not_a_SYBYL (const std::string & atom_type, Element & element) const
{
    Of_type of_type (atom_type);

    std::vector <Mol2_file::SYBYL_atom_type>::iterator begin = atom_types().begin();
    std::vector <Mol2_file::SYBYL_atom_type>::iterator end   = atom_types().end();
    std::vector <Mol2_file::SYBYL_atom_type>::iterator result = 
        std::find_if (begin, end, of_type);
    
    if (result == end)
    {
        if (Element::is(atom_type.c_str())) //fix warnign: not correct .mol2
        {
            element = Element (atom_type.c_str());
            return false;
        }
        else
            return true;
    }

    element = result->element_;
    return false;
}
}//MM



