import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import csv
import sys

# Select 'n' nuber of centriods
n = int(sys.argv[1])

counter = 0
listNames = []
Dataset = pd.read_csv("Output_Matrix.csv")
Centroids = Dataset.sample(n)
Centroids = Centroids ["Unnamed: 0"]
listNames = Centroids.values.tolist()
# print(f"\nRandom {n} centroids are: {listNames}\n\n")

# iterate 250 times to find best centriods
while (counter < 250):
    # print(f"Iteration: {counter}")
    counter += 1
    # print(f"\nRandom {n} centroids are: {listNames}\n\n")

    # Centroids = pd.DataFrame(listNames)

    # # Drop selelected centriods from data-frame to prevent centriod comparisons with itself
    # index = Centroids.index
    # index
    # Dataset.drop(Dataset.index[index], inplace=True)

    # Construct a n x 124 table of the selected centriods and their correlaction values with the other (126 - n) messuremnets.
    Vector = Dataset[Centroids]
    Vector = Vector.values.tolist()
    # Get the names of the other (126 - n) measuremnets
    Names = Dataset["Unnamed: 0"].values.tolist()

    # Create a list of list that contains all cluster groups
    # Each list represents a cluter group, i.e: index 0 represents cluster 0
    MasterCluster = []
    for i in range (n):
        MasterCluster.append([])

    # Here is where measurements are divided into clusters
    x = 0
    # print(Vector)
    for row in Vector:
        # print(f"{row}")
        # ls = [type(item) for item in row]
        # print(ls)
        Group = []
        max_value = max(row)
        # print(f"{max_value} is the max value in the row")
        # Index = Cluster && Names[x] = Measurement in cluster
        # print(f"{Names[x]} {row} || {listNames[row.index(max(row))]} (Centriod of cluster {row.index(max(row))}) has a higher correlcation with {Names[x]}, placed in cluster {row.index(max(row))}")
        Group.append(Names[x])
        Group.extend( [str(max(row))] )
        MasterCluster[row.index(max(row))].append(Group)
        x += 1
    
    # Pick the best measuremnet to be the centriod in a cluster
    # If a measuremnet is choosen as a centriod in a cluster, its correlations are added.
    # The measurement with the highest total correlation with everything else in the group is the best centriod
    # n = the number of clusters
    for i in range (n): 
        maximum = 0
        temp = 0
        best_centriod = 0
        # MasterClsuter holds all 'n' number of clusters
        Cluster = MasterCluster[i]
        # print(f"\nCluster {i} has {len(Cluster)} data objects\n")
        for j in Cluster:
            # print (f"Measurement with higher correlation with everything else in cluster should be centriod: Current measurement has a total of {maximum}")
            if maximum > temp:
                temp = maximum
                best_centriod = cluster_centriod
            maximum = 0
            # Pick measurement within the cluster to be the new centroid
            cluster_centriod = j[0]
            # print(f"{cluster_centriod} is the current centriod for cluster {i}")
            curr_measure = Dataset[["Unnamed: 0", cluster_centriod]].values.tolist()
            for k in Cluster:
                if(cluster_centriod != k[0]):
                    for m in curr_measure:
                        if(k[0]==m[0]):
                            maximum += float(m[1])
            # print(f"{cluster_centriod} has a total accumulative correlation total of {maximum}")
        
        # print(f"\n{best_centriod} was the best centriod for cluster {i} with a accumulative correlation total of {temp}\n")
        listNames[i] = best_centriod

    # Write the cluster groups onto a CSV 
    with open('ClusterGroups.csv', 'w+', newline= '') as csvfile:
        csvwriter = csv.writer(csvfile)

        # for i in listNames:
        #     csvwriter.writerow([i])

        for i in range (n):        
            cluster_num = []
            cluster_num.append("Centriod Name-> " + str(listNames[i]))
            csvwriter.writerows([cluster_num])
            csvwriter.writerows(MasterCluster[i])
            csvwriter.writerows(["\n"])
    
    Centroids = pd.DataFrame(listNames)
    Centroids = Centroids[0]
    Vector.clear()
    MasterCluster.clear()
    Group.clear()
    cluster_num.clear()

print("done")