mirror of
				https://github.com/Telecominfraproject/wlan-lanforge-scripts.git
				synced 2025-10-31 18:58:01 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			146 lines
		
	
	
		
			4.4 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			146 lines
		
	
	
		
			4.4 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
| #!/usr/bin/python3
 | |
| 
 | |
| '''
 | |
| NAME:
 | |
| lf_pdf_search.py
 | |
| 
 | |
| PURPOSE:
 | |
| lf_pdf_search.py will run a pdf grep looking for specific information in pdf files 
 | |
| "pdfgrep -r --include 'ASA*.pdf' 'ASA End Date'" 
 | |
| 
 | |
| EXAMPLE:
 | |
| lf_pdf_search.py
 | |
| 
 | |
| NOTES:
 | |
| 1. copy lf_pdf_search.py to a directory that has the pdf information
 | |
| 
 | |
| TO DO NOTES:
 | |
| 
 | |
| 
 | |
| '''
 | |
| import datetime
 | |
| import pprint
 | |
| import sys
 | |
| if sys.version_info[0]  != 3:
 | |
|     print("This script requires Python3")
 | |
|     exit()
 | |
| 
 | |
| 
 | |
| import os
 | |
| import socket
 | |
| import logging
 | |
| import time
 | |
| from time import sleep
 | |
| import argparse
 | |
| import json
 | |
| import configparser
 | |
| import subprocess
 | |
| import csv
 | |
| import shutil
 | |
| import os.path
 | |
| import xlsxwriter
 | |
| import re
 | |
| import pandas as pd
 | |
| 
 | |
| 
 | |
| class lf_pdf_search():
 | |
|      def __init__(self):
 | |
| 
 | |
|           self.renewal_info = ""
 | |
|           self.timeout = 10
 | |
|           self.outfile = "pdf_search"
 | |
|           self.result = ""
 | |
|           self.stdout_log_txt = ""
 | |
|           self.stdout_log = ""
 | |
|           self.stderr_log_txt = ""
 | |
|           self.stderr_log = ""
 | |
|           self.processed_log_txt = ""
 | |
|           self.dataframe = ""
 | |
|           self.pdf_search_csv = ""
 | |
| 
 | |
|      def get_data(self):
 | |
| 
 | |
|           # o.k. a little over kill here ,  just save data to file to help debug if something goes wrong
 | |
|           if self.outfile is not None:
 | |
|                self.stdout_log_txt = self.outfile
 | |
|                self.stdout_log_txt = self.stdout_log_txt + "-{}-stdout.txt".format("test")
 | |
|                self.stdout_log = open(self.stdout_log_txt, 'w+')
 | |
|                self.stderr_log_txt = self.outfile
 | |
|                self.stderr_log_txt = self.stderr_log_txt + "-{}-stderr.txt".format("test")                    
 | |
|                #self.logger.info("stderr_log_txt: {}".format(stderr_log_txt))
 | |
|                self.stderr_log = open(self.stderr_log_txt, 'w+')
 | |
| 
 | |
|                print("Names {} {}".format(self.stdout_log.name, self.stderr_log.name))
 | |
| 
 | |
|           # have ability to pass in a specific command
 | |
|           command = "pdfgrep -r --include 'ASA*.pdf' 'ASA End Date'"
 | |
|           print("running {}".format(command))
 | |
| 
 | |
|           process = subprocess.Popen(['pdfgrep','-r','--include','ASA*.pdf','ASA End Date'], shell=False, stdout=self.stdout_log, stderr=self.stderr_log, universal_newlines=True)
 | |
|           try:
 | |
|                process.wait(timeout=int(self.timeout))
 | |
|                self.result = "SUCCESS"
 | |
|           except subprocess.TimeoutExpired:
 | |
|                process.terminate()
 | |
|                self.result = "TIMEOUT"
 | |
| 
 | |
|           self.stdout_log.close()
 | |
|           self.stderr_log.close()
 | |
| 
 | |
|           return self.stdout_log_txt
 | |
| 
 | |
|      def preprocess_data(self):
 | |
|           pass
 | |
| 
 | |
|      # this method uses pandas dataframe - will use for data manipulation, 
 | |
|      # the data mainupulation may be done in other manners
 | |
|      def datafile_to_dataframe(self):
 | |
|           # note the error_bad_lines=False will miss one of the lines 
 | |
|           delimiter_list = [':']
 | |
|           try:
 | |
|                self.dataframe = pd.read_csv(self.stdout_log_txt, delimiter = [':'])
 | |
|                #self.dataframe = pd.read_csv(self.stdout_log_txt, sep = ':')
 | |
|           except:
 | |
|                print("one of the files may have a SN: in it need to correct ")
 | |
|                self.dataframe = pd.read_csv(self.stdout_log_txt, delimiter = ':', error_bad_lines=False)
 | |
|           #print(self.dataframe)
 | |
|           print("saving data to .csv")
 | |
|           # this removes the extention of .txt
 | |
|           self.pdf_search_csv= self.stdout_log_txt[:-4]
 | |
|           self.pdf_search_csv = self.pdf_search_csv + ".csv"
 | |
|           self.pdf_search_csv = self.dataframe.to_csv(self.pdf_search_csv,mode='w',index=False)
 | |
| 
 | |
| 
 | |
| def main():
 | |
|     # arguments
 | |
|     parser = argparse.ArgumentParser(
 | |
|         prog='lf_pdf_search.py',
 | |
|         formatter_class=argparse.RawTextHelpFormatter,
 | |
|         epilog='''\
 | |
|             lf_pdf_search.py : for running scripts listed in lf_check_config.ini file
 | |
|             ''',
 | |
|         description='''\
 | |
| lf_pdf_search.py
 | |
| -----------
 | |
| 
 | |
| Summary :
 | |
| ---------
 | |
| show renewas
 | |
|             ''')
 | |
| 
 | |
|     parser.add_argument('--outfile', help="--outfile <Output Generic Name>  used as base name for all files generated", default="")
 | |
|     parser.add_argument('--logfile', help="--logfile <logfile Name>  logging for output of lf_pdf_search script", default="lf_pdf_search.log")
 | |
| 
 | |
|     args = parser.parse_args()    
 | |
| 
 | |
|     pdf_search = lf_pdf_search()
 | |
|     output_file = pdf_search.get_data()
 | |
| 
 | |
|     pdf_search.datafile_to_dataframe()
 | |
| 
 | |
|     print("output file: {}".format(str(output_file)))
 | |
|     print("END lf_pdf_search.py")
 | |
| 
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|      main() | 
