/* This software was developed within the context of the project Metaphor for Science
	Museums (Mesmuses) in the framework of the Information Society Technology
	Programme, part of the Fifth Research And Technological Development
	Framework Programme of the European Community, under the Consortium
	Agreement dated <June 30, 2001>, between INRIA, Finsiel Spa, IMSS,
	Valoris , CSI, ICS-FORTH, ENSTB, EDW International.
*/
//Optimize not keep path


/*
 *  CHANGE LOG:
 *  Each entry starts with a date, who did the changed and what has been done.
 *  Separated by ':'.
 *
 *  03.12.2002 : Sofia Alexaki: Change the way we count the paths. Now path is
 * 			considered  EVERY DISTINCT route starting from the root
 *	     (not only the routes ending in the leaf classes)
 */
package gr.forth.ics.vrp.statistics;

import java.util.*;
import gr.forth.ics.vrp.corevrp.model.*;
import gr.forth.ics.vrp.corevrp.*;
import gr.forth.ics.vrp.corevrp.vocabulary.*;

/**
 * Computes & Reports statistics based on the model & the  RDF_DAG for
 * subclass and subproperty hierarchies constructed by VRP.
 *
 * @author Sofia Alexaki
 * @version 2.5  (30 Januar 2003)
 */
public class RDFModelStatistics {


	private Model mdl = null;
	private HashMap dag = null;

	public RDFModelStatistics (Model mdl)
	{
		this.mdl = mdl;
	}

	/**
	 * Returns the number of instances of each class
	 */
	public HashMap instancesPerClass() {
		Collection schemanodes = mdl.getClasses();
		Collection resnodes = mdl.getDataResources();
		Iterator it = schemanodes.iterator();
		HashMap instancesCounter = new HashMap();
		Resource rdfs_resource = mdl.getResource(rdfschema.Resource);
		while (it.hasNext()) {
			instancesCounter.put((RDF_Class)it.next(), new Integer(0));
		}
		Iterator it1 = resnodes.iterator();
		while (it1.hasNext()) {
			Object obj = it1.next();
			//System.out.println(((Resource)obj).getID());
			if (obj instanceof RDF_Resource) {
				RDF_Resource rr = (RDF_Resource)obj;
				//System.out.println(rr.getID());
				ArrayList types = rr.gettype();
				Iterator it2 = types.iterator();
				while (it2.hasNext()) {
					Object type = (RDF_Class)it2.next();
					Integer tmp =  (Integer)instancesCounter.get(type);
					instancesCounter.put(type, new Integer(tmp.intValue() + 1));
				}
			} else {
				Integer tmp;
				//Note: Shall I create the class RDF_Resource??
				if (rdfs_resource != null) {
					tmp = (Integer)instancesCounter.get(rdfs_resource);
					instancesCounter.put(rdfs_resource, new Integer(tmp.intValue() + 1));
				}  else {
					rdfs_resource = new RDF_Class(rdfschema.Resource);
					instancesCounter.put(rdfs_resource, new Integer(1));
				}
			}
		}
		return instancesCounter;
	}

	/**
	 * Returns the instances distribution. More precisely, the first
	 * column of the hashmap contains the number of instances and the second
	 * column the number of classes having that number of instances
    */
	public HashMap getInstanceDistribution() {
		HashMap instancesCounter = instancesPerClass();
		HashMap instdist = new HashMap();
		Collection cl = instancesCounter.values();
		for (Iterator it = cl.iterator(); it.hasNext(); ) {
			Integer tmp;
			Integer instcounter = (Integer)it.next();
			if ((tmp = (Integer)instdist.get(instcounter)) == null)	{
				instdist.put(instcounter, new Integer(1));
			} else {
				instdist.put(instcounter, new Integer(tmp.intValue() + 1));
			}
		}
		return instdist;
	}

	/**
	 * Prints the number of instances per class
	 */
	public  void printInstanceDistribution() /*throws Exception*/ {
		HashMap instdist = getInstanceDistribution();
		//DataOutputStream out = new DataOutputStream(new BufferedOutputStream(new FileOutputStream("statistics", true)));
		Set key = instdist.keySet();
		Iterator it1 = key.iterator();
		Collection val = instdist.values();
		Iterator it2 = val.iterator();
		Output out = new Output();
		out.out("\nInstance Distribution\n", true);
		out.out("-------------------------\n", true);
		out.out("Instances/Class" + "  " + "Number of Classes\n", true);
		while (it1.hasNext() && it2.hasNext()) {
		  //out.writeBytes(((Integer)it1.next()).toString() + "\t" + ((Integer)it2.next()).toString() + "\n");
		  out.out(((Integer)it1.next()).toString() + "\t\t" + ((Integer)it2.next()).toString() + "\n", true);
	  }
	}


	private void schemaPropertyHierarchyStatistics(HashSet roots) {
		int[] statistics = {0, 0, 0, 0, 0, 0, 0, 0, 0 };
		ArrayList visited = new ArrayList();
		for (Iterator it1 = roots.iterator(); it1.hasNext(); ) {
			RDF_Property root_property = (RDF_Property)it1.next();
			//System.out.println("traverseHierarchy " +root_property.getID());
			traverseHierarchy(root_property, 0, root_property.getID(), statistics, visited);
		}
		formatHierarchyStatistics(statistics, "Hierarchy: SchemaProperty (a dump class that is considered root of all schema properties) ");
	}

    private void computePartDistribution(RDF_DAG rdf_dag, String rootClass, int classHierType) {
       RDF_Class rdf_class = new RDF_Class(rootClass);
       //find the schema roots
       int[] statistics = {0, 0, 0, 0, 0, 0, 0, 0, 0 };
       ArrayList visited = new ArrayList();
       HashSet roots = (HashSet)rdf_dag.getroots();
       HashSet tmpRoots = (HashSet)roots.clone();
       int classRootNum = 0;
       for (Iterator it = tmpRoots.iterator(); it.hasNext(); ) {
          RDF_Class cls = (RDF_Class)it.next();
          //System.out.println(cls.getID());
          if (classHierType == 0) {
            if (! (cls instanceof RDF_MetaClass) ) {
              classRootNum++;
              rdf_dag.enter(cls, rdf_class);
            }
          } else if (classHierType == 1) {
            if ( cls instanceof RDF_MetaClass ) {
              RDF_MetaClass rdf_metaclass = (RDF_MetaClass)cls;
                if (rdf_metaclass.getKind() == RDF_MetaClass.META_CLASS) {
                classRootNum++;
                rdf_dag.enter(cls, rdf_class);
              }
            }
          } else  if (classHierType == 2) {
              if ( cls instanceof RDF_MetaClass ) {
                RDF_MetaClass rdf_metaclass = (RDF_MetaClass)cls;
                 if (rdf_metaclass.getKind() == RDF_MetaClass.META_PROPERTY) {
                  classRootNum++;
                  rdf_dag.enter(cls, rdf_class);
                }
              }
          }
       }
       //Exists a class of the specific hierarchy i.e., class, metaclass, metaproperty
       if (classRootNum !=0 ) {
          traverseHierarchy(rdf_class, 0, rdf_class.getID(), statistics, visited);
          formatHierarchyStatistics(statistics, "Hierarchy: "  + rdf_class.getID());
          relativeDistribution(rdf_dag, 0, 0, rdf_class);
	  relativeDistribution(rdf_dag, 0, 1, rdf_class);
          for (Iterator it = tmpRoots.iterator(); it.hasNext(); ) {
	    RDF_Class cls = (RDF_Class)it.next();
            if (classHierType == 0) {
                if (! (cls instanceof RDF_MetaClass) ) {
                  rdf_dag.remove(cls, rdf_class);
                }
            } else if (classHierType == 1) {
              if ( cls instanceof RDF_MetaClass ) {
                RDF_MetaClass rdf_metaclass = (RDF_MetaClass)cls;
                if (rdf_metaclass.getKind() == RDF_MetaClass.META_CLASS) {
                  rdf_dag.remove(cls, rdf_class);
                }
              }
            } else if (classHierType == 2) {
              if ( cls instanceof RDF_MetaClass ) {
                RDF_MetaClass rdf_metaclass = (RDF_MetaClass)cls;
                if (rdf_metaclass.getKind() == RDF_MetaClass.META_PROPERTY) {
                  rdf_dag.remove(cls, rdf_class);
                }
              }
            }
          }
          rdf_dag.remove(rdf_class);
          rdf_dag.findRoots();
       }
   }

	/**
	 * Changed on 11-12-2001 in order to print the statistics about superclasses
	 * in order to be able to print both total and detailed statistics for hierarchies
	 * Prints the statistics for every hierarchy contained in the RDF_DAG.
	 * It prints i) the maximum number of subnodes ii) the average number of subnodes
	 * iii) the number of hierarchy nodes iv) the maximum depth
	 * v) the average depth vi) the number of paths
	 *@param rdf_dag the RDF_DAG that represents the Hierarchy
	 *@param classHier If true indicates that the rdf_dag contains the class hierarchy
	 * otherwise the rdf_dag contains the property hierarchy
	 */
  public void printHierarchyStatistics(RDF_DAG rdf_dag, boolean classHier) {
    Output out = new Output();
    HashSet roots = (HashSet)rdf_dag.getroots();
    this.dag = rdf_dag.getdag();
    if (dag.size() > 0 ) {
    if (classHier) {
        int[] statistics = {0, 0, 0, 0, 0, 0, 0, 0, 0 };
	ArrayList visited = new ArrayList();
	Resource resource = mdl.getResource(rdfschema.Class);
        if (resource != null && (resource instanceof RDF_Class) ) {
          RDF_Class rdfs_class = (RDF_Class)resource;
          traverseHierarchy(rdfs_class, 0, rdfs_class.getID(), statistics, visited);
	  formatHierarchyStatistics(statistics, "Hierarchy: "  + rdfs_class.getID());
          relativeDistribution(rdf_dag, 0, 0, rdfs_class);
	  relativeDistribution(rdf_dag, 0, 1, rdfs_class);
	}  else {
          computePartDistribution(rdf_dag, rdfschema.Class, 1);
        }
	for (int i=0; i<statistics.length; i++)
	  statistics[i] = 0;
	visited = new ArrayList();
	resource = mdl.getResource(rdf.Property);
	if (resource != null && (resource instanceof RDF_Class) ) {
	   RDF_Class rdf_property = (RDF_Class)resource;
	   traverseHierarchy(rdf_property, 0, rdf_property.getID(), statistics, visited);
	   formatHierarchyStatistics(statistics, "Hierarchy: "  + rdf_property.getID());
           relativeDistribution(rdf_dag, 0, 0, rdf_property);
	   relativeDistribution(rdf_dag, 0, 1, rdf_property);
	} else {
          computePartDistribution(rdf_dag, rdf.Property, 2);
        }
	for (int i=0; i<statistics.length; i++)
	  statistics[i] = 0;
	visited = new ArrayList();
	resource = mdl.getResource(rdfschema.Resource);
       	if (resource != null && (resource instanceof RDF_Class) ) {
	  RDF_Class rdfs_resource = (RDF_Class)resource;
          traverseHierarchy(rdfs_resource, 0, rdfs_resource.getID(), statistics, visited);
          formatHierarchyStatistics(statistics, "Hierarchy: "  + rdfs_resource.getID());
          relativeDistribution(rdf_dag, 0, 0, rdfs_resource);
	  relativeDistribution(rdf_dag, 0, 1, rdfs_resource);
	} else {
          computePartDistribution(rdf_dag, rdfschema.Resource, 0);
        }
    } else {//Property Hierarchy
      schemaPropertyHierarchyStatistics(roots);
      relativeDistribution(rdf_dag, 1, 0, null);
      relativeDistribution(rdf_dag, 1, 1, null);
   }
   } //dag>0
}

  private void formatHierarchyStatistics(int[] statistics, String label) {
    Output out = new Output();
    out.outln(label, true);
    out.outln("\tNumber of hierarchy nodes: " +  statistics[2], true);
    if (statistics[2] > 1) {
      out.outln("\tMaximum number of subnodes: " +  statistics[0], true);
      double aver = 0;
      if (statistics[2] != 0)
	aver = (double)statistics[1]/statistics[2];
      out.outln("\tAverage number of subnodes: " +  aver, true);
      out.outln("\tMaximum Depth: " + statistics[3], true);
      double averdepth = 0;
      if (statistics[5] != 0)
        averdepth = (double)statistics[4]/statistics[5];
	out.outln("\tAverage Depth: " + averdepth, true);
	out.outln("\tNumber of paths: " +  statistics[5], true);
	double superaver = 0;
	if (statistics[2] != 0)
	  superaver = (double)statistics[7]/statistics[2];
	out.outln("\tMaximum number of supernodes: " +  statistics[6], true);
	out.outln("\tAverage number of supernodes: " +  superaver, true);
	double multiperc = 0;
	if (statistics[2] != 0)
	  multiperc =  (double)statistics[8]/statistics[2];
	out.outln("\tNodes percentage having more than one supernode: " + multiperc, true);
    }
 }

	/**
	 * Changed on 11-12-2001 in order to compute the statistics about superclasses
	 * Traverses top-down the hierarchy and finds the statistics.
	 */
	private void traverseHierarchy (Object node, int level, String path, int[] statistics, ArrayList visited ) {
		ArrayList al = (ArrayList)dag.get(node);
                if (al == null)
                  return;
		HashSet subnodes = (HashSet)al.get(0);
		HashSet supernodes = (HashSet)al.get(1);
		int subs = subnodes.size();
		int supers = supernodes.size();
		//Node may be already visited because of multi-inheritance in the hierarchy
		if ( !visited.contains(node) ) {
			visited.add(node);
			if (statistics[0] < subs )
				statistics[0] = subs;  //Set max number of subnodes
			statistics[1] += subs;
	//		if ( !((level == 0) && (subs == 0))){
				statistics[2]++; //Increase number of hierarchy nodes
				if (statistics[6] < supers )
					statistics[6] = supers;  //Set max number of supernodes
				statistics[7] += supers;  //Increase the super links
				if (supers > 1)
				  statistics[8]++; //Increase the number of multi-inherited nodes
			//}
		}
		if (statistics[3] < level )
			statistics[3] = level;  //Set max path length
		statistics[4] += level;
		statistics[5] += subs; //Every time a node is met we add the number of
		                	  //its subnodes
		for (Iterator it = subnodes.iterator(); it.hasNext(); ) {
			Object next = it.next();
			traverseHierarchy(next, level+1, path.concat(((Resource)next).getID()), statistics, visited);
		}
	}

  public void relativeDistribution(RDF_DAG rdf_dag, int hierType, int relKind, Resource classRoot) {
    HashMap dist = new HashMap();
    if ( hierType == 0 ) {
      rdf_dag.getAllDescendants(classRoot);
      rdf_dag.getAllAncestors();
    } else {
      HashSet roots = (HashSet)rdf_dag.getroots();
      for (Iterator it1 = roots.iterator(); it1.hasNext(); ) {
        RDF_Property root_property = (RDF_Property)it1.next();
        rdf_dag.getAllDescendants(root_property);
      }
      rdf_dag.getAllAncestors();
    }
    Collection cl = dag.values();
    for (Iterator it = cl.iterator(); it.hasNext(); ) {
      ArrayList al = (ArrayList)it.next();
      Integer relNum = null;
      if (relKind == 0)
	relNum = (Integer)al.get(2);
      else
	relNum = (Integer)al.get(3);
      Integer tmp;
      if ((tmp = (Integer)dist.get(relNum)) == null)	{
	  dist.put(relNum, new Integer(1));
      } else {
	  dist.put(relNum, new Integer(tmp.intValue() + 1));
	}
    }
    Set key = dist.keySet();
    Iterator it1 = key.iterator();
    Output out = new Output();
    if (relKind == 0)
      out.out("Number of descendants", true);
    else
      out.out("Number of ancestors", true);
    if ( hierType == 0 )
      out.out("   Number of Classes\n", true);
    else
      out.out("   Number of Properties\n", true);
    while (it1.hasNext()) {
      Integer relNum = (Integer)it1.next();
      out.out(relNum.toString() + "\t\t" + ((Integer)dist.get(relNum)).toString() + "\n", true);
    }
  }
}


                /*  RDF_Class rdfs_resource = new RDF_Class(rdfschema.Resource);
                  //find the schema roots
                  HashSet tmpRoots = (HashSet)roots.clone();
                  int classRootNum = 0;
            	  for (Iterator it = tmpRoots.iterator(); it.hasNext(); ) {
		    RDF_Class cls = (RDF_Class)it.next();
                    System.out.println(cls.getID());
                    if (! (cls instanceof RDF_MetaClass) ) {
                      classRootNum++;
                      rdf_dag.enter(cls, rdfs_resource);
                    }
                  }
                  if (classRootNum !=0 ) {
                  traverseHierarchy(rdfs_resource, 0, rdfs_resource.getID(), statistics, visited);
                  formatHierarchyStatistics(statistics, "Hierarchy: "  + rdfs_resource.getID());
                  relativeDistribution(rdf_dag, 0, 0, rdfs_resource);
		  relativeDistribution(rdf_dag, 0, 1, rdfs_resource);
              	  for (Iterator it = tmpRoots.iterator(); it.hasNext(); ) {
		    RDF_Class cls = (RDF_Class)it.next();
                    if (! (cls instanceof RDF_MetaClass) ) {
                      rdf_dag.remove(cls, rdfs_resource);
                    }
                   }
                  rdf_dag.remove(rdfs_resource);
                  rdf_dag.findRoots();
                  }
                }*/
/*	private void hierarhcyStatistics(RDF_Class root, HashSet roots, Model mdl) {
		int[] statistics = {0, 0, 0, 0, 0, 0, 0, 0, 0 };
		ArrayList visited = new ArrayList();
		ArrayList al = (ArrayList)dag.get(root);
		HashSet subnodes = (HashSet)al.get(0);
		for (Iterator it1 = subnodes.iterator(); it1.hasNext(); ) {
			RDF_Class subnode = (RDF_Class)it1.next();
			System.out.println("traverseHierarchy " +subnode.getID());
			traverseHierarchy(subnode, 0, subnode.getID(), statistics, visited);
		}
		Resource rdf_resource = mdl.getResource(rdfschema.Resource);
		if (root == rdf_resource) {   //Ierarxia Resource
			Resource rdfs_class = mdl.getResource(rdfschema.Class);
			Resource rdf_property = mdl.getResource(rdf.Property);
			for (Iterator it1 = roots.iterator(); it1.hasNext(); ) {
				RDF_Class rdf_class = (RDF_Class)it1.next();
				if ( (rdf_class != rdf_resource) && (rdf_class != rdfs_class)
					  && (rdf_class != rdf_property)  ) {
					 if (! (rdf_class.getID().startsWith(rdf.NS) || rdf_class.getID().startsWith(rdfschema.NS)
					 || rdf_class.getID().startsWith(XMLSchema_DT.NS) )   ) {
						System.out.println("traverseHierarchy " + rdf_class.getID());
						traverseHierarchy(rdf_class, 0, rdf_class.getID(), statistics, visited);
					 }
				}
			}
		}
		formatHierarchyStatistics(statistics, "Hierarchy: "  + root.getID());
	}*/
