mirror of
				https://github.com/Telecominfraproject/wlan-lanforge-scripts.git
				synced 2025-11-03 20:27:54 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			146 lines
		
	
	
		
			4.4 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			146 lines
		
	
	
		
			4.4 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
#!/usr/bin/python3
 | 
						|
 | 
						|
'''
 | 
						|
NAME:
 | 
						|
lf_pdf_search.py
 | 
						|
 | 
						|
PURPOSE:
 | 
						|
lf_pdf_search.py will run a pdf grep looking for specific information in pdf files 
 | 
						|
"pdfgrep -r --include 'ASA*.pdf' 'ASA End Date'" 
 | 
						|
 | 
						|
EXAMPLE:
 | 
						|
lf_pdf_search.py
 | 
						|
 | 
						|
NOTES:
 | 
						|
1. copy lf_pdf_search.py to a directory that has the pdf information
 | 
						|
 | 
						|
TO DO NOTES:
 | 
						|
 | 
						|
 | 
						|
'''
 | 
						|
import datetime
 | 
						|
import pprint
 | 
						|
import sys
 | 
						|
if sys.version_info[0]  != 3:
 | 
						|
    print("This script requires Python3")
 | 
						|
    exit()
 | 
						|
 | 
						|
 | 
						|
import os
 | 
						|
import socket
 | 
						|
import logging
 | 
						|
import time
 | 
						|
from time import sleep
 | 
						|
import argparse
 | 
						|
import json
 | 
						|
import configparser
 | 
						|
import subprocess
 | 
						|
import csv
 | 
						|
import shutil
 | 
						|
import os.path
 | 
						|
import xlsxwriter
 | 
						|
import re
 | 
						|
import pandas as pd
 | 
						|
 | 
						|
 | 
						|
class lf_pdf_search():
 | 
						|
     def __init__(self):
 | 
						|
 | 
						|
          self.renewal_info = ""
 | 
						|
          self.timeout = 10
 | 
						|
          self.outfile = "pdf_search"
 | 
						|
          self.result = ""
 | 
						|
          self.stdout_log_txt = ""
 | 
						|
          self.stdout_log = ""
 | 
						|
          self.stderr_log_txt = ""
 | 
						|
          self.stderr_log = ""
 | 
						|
          self.processed_log_txt = ""
 | 
						|
          self.dataframe = ""
 | 
						|
          self.pdf_search_csv = ""
 | 
						|
 | 
						|
     def get_data(self):
 | 
						|
 | 
						|
          # o.k. a little over kill here ,  just save data to file to help debug if something goes wrong
 | 
						|
          if self.outfile is not None:
 | 
						|
               self.stdout_log_txt = self.outfile
 | 
						|
               self.stdout_log_txt = self.stdout_log_txt + "-{}-stdout.txt".format("test")
 | 
						|
               self.stdout_log = open(self.stdout_log_txt, 'w+')
 | 
						|
               self.stderr_log_txt = self.outfile
 | 
						|
               self.stderr_log_txt = self.stderr_log_txt + "-{}-stderr.txt".format("test")                    
 | 
						|
               #self.logger.info("stderr_log_txt: {}".format(stderr_log_txt))
 | 
						|
               self.stderr_log = open(self.stderr_log_txt, 'w+')
 | 
						|
 | 
						|
               print("Names {} {}".format(self.stdout_log.name, self.stderr_log.name))
 | 
						|
 | 
						|
          # have ability to pass in a specific command
 | 
						|
          command = "pdfgrep -r --include 'ASA*.pdf' 'ASA End Date'"
 | 
						|
          print("running {}".format(command))
 | 
						|
 | 
						|
          process = subprocess.Popen(['pdfgrep','-r','--include','ASA*.pdf','ASA End Date'], shell=False, stdout=self.stdout_log, stderr=self.stderr_log, universal_newlines=True)
 | 
						|
          try:
 | 
						|
               process.wait(timeout=int(self.timeout))
 | 
						|
               self.result = "SUCCESS"
 | 
						|
          except subprocess.TimeoutExpired:
 | 
						|
               process.terminate()
 | 
						|
               self.result = "TIMEOUT"
 | 
						|
 | 
						|
          self.stdout_log.close()
 | 
						|
          self.stderr_log.close()
 | 
						|
 | 
						|
          return self.stdout_log_txt
 | 
						|
 | 
						|
     def preprocess_data(self):
 | 
						|
          pass
 | 
						|
 | 
						|
     # this method uses pandas dataframe - will use for data manipulation, 
 | 
						|
     # the data mainupulation may be done in other manners
 | 
						|
     def datafile_to_dataframe(self):
 | 
						|
          # note the error_bad_lines=False will miss one of the lines 
 | 
						|
          delimiter_list = [':']
 | 
						|
          try:
 | 
						|
               self.dataframe = pd.read_csv(self.stdout_log_txt, delimiter = [':'])
 | 
						|
               #self.dataframe = pd.read_csv(self.stdout_log_txt, sep = ':')
 | 
						|
          except:
 | 
						|
               print("one of the files may have a SN: in it need to correct ")
 | 
						|
               self.dataframe = pd.read_csv(self.stdout_log_txt, delimiter = ':', error_bad_lines=False)
 | 
						|
          #print(self.dataframe)
 | 
						|
          print("saving data to .csv")
 | 
						|
          # this removes the extention of .txt
 | 
						|
          self.pdf_search_csv= self.stdout_log_txt[:-4]
 | 
						|
          self.pdf_search_csv = self.pdf_search_csv + ".csv"
 | 
						|
          self.pdf_search_csv = self.dataframe.to_csv(self.pdf_search_csv,mode='w',index=False)
 | 
						|
 | 
						|
 | 
						|
def main():
 | 
						|
    # arguments
 | 
						|
    parser = argparse.ArgumentParser(
 | 
						|
        prog='lf_pdf_search.py',
 | 
						|
        formatter_class=argparse.RawTextHelpFormatter,
 | 
						|
        epilog='''\
 | 
						|
            lf_pdf_search.py : for running scripts listed in lf_check_config.ini file
 | 
						|
            ''',
 | 
						|
        description='''\
 | 
						|
lf_pdf_search.py
 | 
						|
-----------
 | 
						|
 | 
						|
Summary :
 | 
						|
---------
 | 
						|
show renewas
 | 
						|
            ''')
 | 
						|
 | 
						|
    parser.add_argument('--outfile', help="--outfile <Output Generic Name>  used as base name for all files generated", default="")
 | 
						|
    parser.add_argument('--logfile', help="--logfile <logfile Name>  logging for output of lf_pdf_search script", default="lf_pdf_search.log")
 | 
						|
 | 
						|
    args = parser.parse_args()    
 | 
						|
 | 
						|
    pdf_search = lf_pdf_search()
 | 
						|
    output_file = pdf_search.get_data()
 | 
						|
 | 
						|
    pdf_search.datafile_to_dataframe()
 | 
						|
 | 
						|
    print("output file: {}".format(str(output_file)))
 | 
						|
    print("END lf_pdf_search.py")
 | 
						|
 | 
						|
 | 
						|
if __name__ == "__main__":
 | 
						|
     main() |