From d7d54b0e14dab1d1cf2fde5f5da8ed8f0eab1884 Mon Sep 17 00:00:00 2001 From: Chuck SmileyRekiere Date: Sun, 6 Jun 2021 07:47:26 -0600 Subject: [PATCH] lf_pdf_search.py : start of preprocessing the text file before writing out to csv Signed-off-by: Chuck SmileyRekiere --- py-scripts/sandbox/lf_pdf_search.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/py-scripts/sandbox/lf_pdf_search.py b/py-scripts/sandbox/lf_pdf_search.py index cd1de22d..1c5cb42c 100755 --- a/py-scripts/sandbox/lf_pdf_search.py +++ b/py-scripts/sandbox/lf_pdf_search.py @@ -54,7 +54,9 @@ class lf_pdf_search(): self.stdout_log = "" self.stderr_log_txt = "" self.stderr_log = "" + self.processed_log_txt = "" self.dataframe = "" + self.pdf_search_csv = "" def get_data(self): @@ -94,17 +96,19 @@ class lf_pdf_search(): # the data mainupulation may be done in other manners def datafile_to_dataframe(self): # note the error_bad_lines=False will miss one of the lines + delimiter_list = [':'] try: self.dataframe = pd.read_csv(self.stdout_log_txt, delimiter = [':']) + #self.dataframe = pd.read_csv(self.stdout_log_txt, sep = ':') except: print("one of the files may have a SN: in it need to correct ") self.dataframe = pd.read_csv(self.stdout_log_txt, delimiter = ':', error_bad_lines=False) #print(self.dataframe) print("saving data to .csv") # this removes the extention of .txt - pdf_search_csv= self.stdout_log_txt[:-4] - pdf_search_csv = pdf_search_csv + ".csv" - pdf_search_csv = self.dataframe.to_csv(pdf_search_csv,mode='w',index=False) + self.pdf_search_csv= self.stdout_log_txt[:-4] + self.pdf_search_csv = self.pdf_search_csv + ".csv" + self.pdf_search_csv = self.dataframe.to_csv(self.pdf_search_csv,mode='w',index=False) def main():