wlan-lanforge-scripts/py-scripts/sandbox/lf_pdf_search.py

#!/usr/bin/python3

'''
NAME:
lf_pdf_search.py

PURPOSE:
lf_pdf_search.py will run a pdf grep looking for specific information in pdf files
"pdfgrep -r --include 'ASA*.pdf' 'ASA End Date'"

EXAMPLE:
lf_pdf_search.py

NOTES:
1. copy lf_pdf_search.py to a directory that has the pdf information

TO DO NOTES:


'''
import datetime
import pprint
import sys
if sys.version_info[0]  != 3:
    print("This script requires Python3")
    exit()


import os
import socket
import logging
import time
from time import sleep
import argparse
import json
import configparser
import subprocess
import csv
import shutil
import os.path
import xlsxwriter
import re
import pandas as pd


class lf_pdf_search():
     def __init__(self):

          self.renewal_info = ""
          self.timeout = 10
          self.outfile = "pdf_search"
          self.result = ""
          self.stdout_log_txt = ""
          self.stdout_log = ""
          self.stderr_log_txt = ""
          self.stderr_log = ""
          self.processed_log_txt = ""
          self.dataframe = ""
          self.pdf_search_csv = ""

     def get_data(self):

          # o.k. a little over kill here ,  just save data to file to help debug if something goes wrong
          if self.outfile is not None:
               self.stdout_log_txt = self.outfile
               self.stdout_log_txt = self.stdout_log_txt + "-{}-stdout.txt".format("test")
               self.stdout_log = open(self.stdout_log_txt, 'w+')
               self.stderr_log_txt = self.outfile
               self.stderr_log_txt = self.stderr_log_txt + "-{}-stderr.txt".format("test")
               #self.logger.info("stderr_log_txt: {}".format(stderr_log_txt))
               self.stderr_log = open(self.stderr_log_txt, 'w+')

               print("Names {} {}".format(self.stdout_log.name, self.stderr_log.name))

          # have ability to pass in a specific command
          command = "pdfgrep -r --include 'ASA*.pdf' 'ASA End Date'"
          print("running {}".format(command))

          process = subprocess.Popen(['pdfgrep','-r','--include','ASA*.pdf','ASA End Date'], shell=False, stdout=self.stdout_log, stderr=self.stderr_log, universal_newlines=True)
          try:
               process.wait(timeout=int(self.timeout))
               self.result = "SUCCESS"
          except subprocess.TimeoutExpired:
               process.terminate()
               self.result = "TIMEOUT"

          self.stdout_log.close()
          self.stderr_log.close()

          return self.stdout_log_txt

     def preprocess_data(self):
          pass

     # this method uses pandas dataframe - will use for data manipulation,
     # the data mainupulation may be done in other manners
     def datafile_to_dataframe(self):
          # note the error_bad_lines=False will miss one of the lines
          delimiter_list = [':']
          try:
               self.dataframe = pd.read_csv(self.stdout_log_txt, delimiter = [':'])
               #self.dataframe = pd.read_csv(self.stdout_log_txt, sep = ':')
          except:
               print("one of the files may have a SN: in it need to correct ")
               self.dataframe = pd.read_csv(self.stdout_log_txt, delimiter = ':', error_bad_lines=False)
          #print(self.dataframe)
          print("saving data to .csv")
          # this removes the extention of .txt
          self.pdf_search_csv= self.stdout_log_txt[:-4]
          self.pdf_search_csv = self.pdf_search_csv + ".csv"
          self.pdf_search_csv = self.dataframe.to_csv(self.pdf_search_csv,mode='w',index=False)


def main():
    # arguments
    parser = argparse.ArgumentParser(
        prog='lf_pdf_search.py',
        formatter_class=argparse.RawTextHelpFormatter,
        epilog='''\
            lf_pdf_search.py : for running scripts listed in lf_check_config.ini file
            ''',
        description='''\
lf_pdf_search.py
-----------

Summary :
---------
show renewas
            ''')

    parser.add_argument('--outfile', help="--outfile <Output Generic Name>  used as base name for all files generated", default="")
    parser.add_argument('--logfile', help="--logfile <logfile Name>  logging for output of lf_pdf_search script", default="lf_pdf_search.log")

    args = parser.parse_args()

    pdf_search = lf_pdf_search()
    output_file = pdf_search.get_data()

    pdf_search.datafile_to_dataframe()

    print("output file: {}".format(str(output_file)))
    print("END lf_pdf_search.py")


if __name__ == "__main__":
     main()