From dcf85ba785776f3426519a919e2a2edf7cf9308b Mon Sep 17 00:00:00 2001
From: Dipti <dipti.dhond@candelatech.com>
Date: Thu, 25 Feb 2021 09:38:48 -0800
Subject: [PATCH] compare two dfs, small bugs but logic is now there

Signed-off-by: Dipti <dipti.dhond@candelatech.com>
---
 py-json/LANforge/lfcli_base.py        | 81 +++++++++++++++++++++++----
 py-scripts/test_ipv4_variable_time.py |  2 +-
 2 files changed, 71 insertions(+), 12 deletions(-)

diff --git a/py-json/LANforge/lfcli_base.py b/py-json/LANforge/lfcli_base.py
index 4c2a0331..96f0fff1 100644
--- a/py-json/LANforge/lfcli_base.py
+++ b/py-json/LANforge/lfcli_base.py
@@ -662,20 +662,79 @@ class LFCliBase:
         if file_name.split('.')[-1] == 'csv':
             return pd.read_csv(file_name)
         
-   
+   #only works for test_ipv4_variable_time at the moment
     def compare_two_df(self,dataframe_one=None,dataframe_two=None):
+        #df one = current report
+        #df two = compared report
         pd.set_option("display.max_rows", None, "display.max_columns", None)
         #get all of common columns besides Timestamp, Timestamp milliseconds 
-        common_cols = set(dataframe_one.columns).intersection(set(dataframe_two.columns))
-        if common_cols is not None:
-            cols_to_remove=['Timestamp milliseconds epoch','Timestamp','LANforge GUI Build: 5.4.3']
-            #drop unwanted cols from df
-            dataframe_one = dataframe_one.drop(list(cols_to_remove), axis=1)
-            dataframe_two = dataframe_two.drop(list(cols_to_remove), axis=1)
-            #for time elapsed section and endpoint name combo
-            #
-            #print(dataframe_one)
-            #print(dataframe_two)
+        common_cols = list(set(dataframe_one.columns).intersection(set(dataframe_two.columns)))
+        cols_to_remove = ['Timestamp milliseconds epoch','Timestamp','LANforge GUI Build: 5.4.3']
+        com_cols = [i for i in common_cols if i not in cols_to_remove]
+        #check if dataframes have the same endpoints
+        if dataframe_one.name.unique().tolist().sort() == dataframe_two.name.unique().tolist().sort():
+            endpoint_names =  dataframe_one.name.unique().tolist()
+            if com_cols is not None:
+                dataframe_one = dataframe_one[[c for c in dataframe_one.columns if c in com_cols]]
+                dataframe_two = dataframe_two[[c for c in dataframe_one.columns if c in com_cols]]
+                dataframe_one = dataframe_one.loc[:, ~dataframe_one.columns.str.startswith('Script Name:')]
+                dataframe_two = dataframe_two.loc[:, ~dataframe_two.columns.str.startswith('Script Name:')]
+                lowest_duration=min(dataframe_one['Duration elapsed'].max(),dataframe_two['Duration elapsed'].max())
+                print("The max duration in the new dataframe will be... " + str(lowest_duration))
+
+                compared_values_dataframe = pd.DataFrame(columns=[col for col in com_cols if not col.startswith('Script Name:')])
+                cols = compared_values_dataframe.columns.tolist()
+                cols=sorted(cols, key=lambda L: (L.lower(), L))
+                compared_values_dataframe= compared_values_dataframe[cols]
+                print(compared_values_dataframe)
+                for duration_elapsed in range(lowest_duration):
+                    for endpoint in endpoint_names:
+                        #check if value has a space in it or is a str. 
+                        # if value as a space, only take value before space for calc, append that calculated value after space.
+                        #if str. check if values match from 2 df's. if values do not match, write N/A
+                        for_loop_df1 = dataframe_one.loc[(dataframe_one['name'] == endpoint) & (dataframe_one['Duration elapsed'] == duration_elapsed)]
+                        for_loop_df2 = dataframe_two.loc[(dataframe_one['name'] == endpoint) & (dataframe_two['Duration elapsed'] == duration_elapsed)]
+                       # print(for_loop_df1)
+                       # print(for_loop_df2)
+                        cols_to_loop = [i for i in com_cols if i not in ['Duration elapsed', 'Name', 'Script Name: test_ipv4_variable_time']]
+                        cols_to_loop=sorted(cols_to_loop, key=lambda L: (L.lower(), L))
+                        print(cols_to_loop)
+                        row_to_append={}
+                        row_to_append["Duration elapsed"] = duration_elapsed
+                        for col in cols_to_loop:
+                            print(col)
+                            print(for_loop_df1)
+                            #print(for_loop_df2)
+                            print(for_loop_df1.at[0, col])
+                            print(for_loop_df2.at[0, col])
+                            if type(for_loop_df1.at[0, col]) == str and type(for_loop_df2.at[0, col]) == str:
+                                if (' ' in for_loop_df1.at[0,col]) == True:
+                                    #do subtraction
+                                    new_value = float(for_loop_df1.at[0, col].split(" ")[0]) - float(for_loop_df2.at[0, col].split(" ")[0])
+                                    #add on last half of string
+                                    new_value = str(new_value)+ for_loop_df2.at[0, col].split(" ")[1]
+                                    # print(new_value)
+                                    row_to_append[col] = new_value
+                                else:
+                                    if for_loop_df1.at[0, col] != for_loop_df2.at[0, col]:
+                                        row_to_append[col] = 'NaN'
+                                    else:
+                                        row_to_append[col] = for_loop_df1.at[0,col]
+                            elif type(for_loop_df1.at[0, col]) == int and type(for_loop_df2.at[0, col]) == int or type(for_loop_df1.at[0, col]) == float and type(for_loop_df2.at[0,col]) == float:
+                                new_value = for_loop_df1.at[0, col] - for_loop_df2.at[0, col]
+                                row_to_append[col] = new_value
+                        compared_values_dataframe = compared_values_dataframe.append(row_to_append, ignore_index=True,)
+                        print(compared_values_dataframe)
+                    #add col name to new df
+                print(dataframe_one)
+                print(dataframe_two)
+                print(compared_values_dataframe)
+            else:
+                ValueError("Unable to execute report comparison due to inadequate file commonalities. ")   
+                exit(1)
+        else:
+            ValueError("Two files do not have the same endpoints. Please try file comparison with files that have the same endpoints.")
+            exit(1)
     
         
         #take those columns and separate those columns from others in DF.
diff --git a/py-scripts/test_ipv4_variable_time.py b/py-scripts/test_ipv4_variable_time.py
index 18783844..04a77e09 100755
--- a/py-scripts/test_ipv4_variable_time.py
+++ b/py-scripts/test_ipv4_variable_time.py
@@ -197,7 +197,7 @@ python3 ./test_ipv4_variable_time.py
     --output_format csv
     --report_file ~/Documents/results.csv                       (Example of csv file output  - please use another extension for other file formats)
     --compared_report ~/Documents/results_prev.csv              (Example of csv file retrieval - please use another extension for other file formats) - UNDER CONSTRUCTION
-    --layer3_cols'name','tx bytes','rx bytes','dropped'          (column names from the GUI to print on report -  please read below to know what to put here according to preferences)
+    --layer3_cols 'name','tx bytes','rx bytes','dropped'          (column names from the GUI to print on report -  please read below to know what to put here according to preferences)
     --port_mgr_cols 'ap','ip'                                    (column names from the GUI to print on report -  please read below to know what to put here according to preferences)
     --debug
 ===============================================================================