mirror of
https://github.com/optim-enterprises-bv/nDPId.git
synced 2025-10-30 01:42:22 +00:00
Added Keras based Autoencode (Work-in-Progress!)
* minor fixes Signed-off-by: Toni Uhlig <matzeton@googlemail.com>
This commit is contained in:
2
.github/workflows/build-openwrt.yml
vendored
2
.github/workflows/build-openwrt.yml
vendored
@@ -14,7 +14,7 @@ on:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
build:
|
||||||
name: ${{ matrix.arch }} build
|
name: ${{ matrix.arch }} ${{ matrix.target }}
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
|
|||||||
2
.github/workflows/build.yml
vendored
2
.github/workflows/build.yml
vendored
@@ -14,7 +14,7 @@ on:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
test:
|
test:
|
||||||
name: ${{ matrix.os }} ${{ matrix.gcrypt }}
|
name: ${{ matrix.os }} ${{ matrix.compiler }}
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
env:
|
env:
|
||||||
CMAKE_C_COMPILER: ${{ matrix.compiler }}
|
CMAKE_C_COMPILER: ${{ matrix.compiler }}
|
||||||
|
|||||||
2
dependencies/nDPIsrvd.h
vendored
2
dependencies/nDPIsrvd.h
vendored
@@ -35,8 +35,6 @@
|
|||||||
#define nDPIsrvd_ARRAY_LENGTH(s) (sizeof(s) / sizeof(s[0]))
|
#define nDPIsrvd_ARRAY_LENGTH(s) (sizeof(s) / sizeof(s[0]))
|
||||||
#define nDPIsrvd_STRLEN_SZ(s) (sizeof(s) / sizeof(s[0]) - sizeof(s[0]))
|
#define nDPIsrvd_STRLEN_SZ(s) (sizeof(s) / sizeof(s[0]) - sizeof(s[0]))
|
||||||
#define TOKEN_GET_SZ(sock, ...) nDPIsrvd_get_token(sock, __VA_ARGS__, NULL)
|
#define TOKEN_GET_SZ(sock, ...) nDPIsrvd_get_token(sock, __VA_ARGS__, NULL)
|
||||||
#define TOKEN_GET_VALUE_SZ(sock, value_length, ...) \
|
|
||||||
nDPIsrvd_get_token_value(sock, TOKEN_GET_SZ(sock, __VA_ARGS__, NULL))
|
|
||||||
#define TOKEN_VALUE_EQUALS(sock, token, string_to_check, string_to_check_length) \
|
#define TOKEN_VALUE_EQUALS(sock, token, string_to_check, string_to_check_length) \
|
||||||
nDPIsrvd_token_value_equals(sock, token, string_to_check, string_to_check_length)
|
nDPIsrvd_token_value_equals(sock, token, string_to_check, string_to_check_length)
|
||||||
#define TOKEN_VALUE_EQUALS_SZ(sock, token, string_to_check) \
|
#define TOKEN_VALUE_EQUALS_SZ(sock, token, string_to_check) \
|
||||||
|
|||||||
@@ -41,6 +41,11 @@ Required by `tests/run_tests.sh`
|
|||||||
|
|
||||||
## py-machine-learning
|
## py-machine-learning
|
||||||
|
|
||||||
|
Contains:
|
||||||
|
|
||||||
|
1. Classification via Random Forests and SciLearn
|
||||||
|
2. Anomaly Detection via Autoencoder and Keras (Work-In-Progress!)
|
||||||
|
|
||||||
Use sklearn together with CSVs created with **c-analysed** to train and predict DPI detections.
|
Use sklearn together with CSVs created with **c-analysed** to train and predict DPI detections.
|
||||||
|
|
||||||
Try it with: `./examples/py-machine-learning/sklearn_random_forest.py --csv ./ndpi-analysed.csv --proto-class tls.youtube --proto-class tls.github --proto-class tls.spotify --proto-class tls.facebook --proto-class tls.instagram --proto-class tls.doh_dot --proto-class quic --proto-class icmp`
|
Try it with: `./examples/py-machine-learning/sklearn_random_forest.py --csv ./ndpi-analysed.csv --proto-class tls.youtube --proto-class tls.github --proto-class tls.spotify --proto-class tls.facebook --proto-class tls.instagram --proto-class tls.doh_dot --proto-class quic --proto-class icmp`
|
||||||
|
|||||||
125
examples/py-machine-learning/keras-autoencoder.py
Executable file
125
examples/py-machine-learning/keras-autoencoder.py
Executable file
@@ -0,0 +1,125 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import csv
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
import os
|
||||||
|
import pandas as pd
|
||||||
|
import tensorflow as tf
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from tensorflow.keras import layers, preprocessing
|
||||||
|
from tensorflow.keras.layers import Embedding, Input, Dense
|
||||||
|
from tensorflow.keras.models import Model, Sequential
|
||||||
|
from tensorflow.keras.utils import plot_model
|
||||||
|
|
||||||
|
sys.path.append(os.path.dirname(sys.argv[0]) + '/../../dependencies')
|
||||||
|
sys.path.append(os.path.dirname(sys.argv[0]) + '/../share/nDPId')
|
||||||
|
sys.path.append(os.path.dirname(sys.argv[0]))
|
||||||
|
sys.path.append(sys.base_prefix + '/share/nDPId')
|
||||||
|
import nDPIsrvd
|
||||||
|
from nDPIsrvd import nDPIsrvdSocket, TermColor
|
||||||
|
|
||||||
|
input_size = nDPIsrvd.nDPId_PACKETS_PLEN_MAX
|
||||||
|
training_size = 500
|
||||||
|
batch_size = 100
|
||||||
|
|
||||||
|
def generate_autoencoder():
|
||||||
|
input_i = Input(shape=())
|
||||||
|
input_i = Embedding(input_dim=input_size, output_dim=input_size, mask_zero=True)(input_i)
|
||||||
|
encoded_h1 = Dense(1024, activation='relu', name='input_i')(input_i)
|
||||||
|
encoded_h2 = Dense(512, activation='relu', name='encoded_h1')(encoded_h1)
|
||||||
|
encoded_h3 = Dense(128, activation='relu', name='encoded_h2')(encoded_h2)
|
||||||
|
encoded_h4 = Dense(64, activation='relu', name='encoded_h3')(encoded_h3)
|
||||||
|
encoded_h5 = Dense(32, activation='relu', name='encoded_h4')(encoded_h4)
|
||||||
|
latent = Dense(2, activation='relu', name='encoded_h5')(encoded_h5)
|
||||||
|
decoder_h1 = Dense(32, activation='relu', name='latent')(latent)
|
||||||
|
decoder_h2 = Dense(64, activation='relu', name='decoder_h1')(decoder_h1)
|
||||||
|
decoder_h3 = Dense(128, activation='relu', name='decoder_h2')(decoder_h2)
|
||||||
|
decoder_h4 = Dense(512, activation='relu', name='decoder_h3')(decoder_h3)
|
||||||
|
decoder_h5 = Dense(1024, activation='relu', name='decoder_h4')(decoder_h4)
|
||||||
|
return input_i, Model(input_i, Dense(input_size, activation='sigmoid', name='decoder_h5')(decoder_h5))
|
||||||
|
|
||||||
|
def compile_autoencoder():
|
||||||
|
inp, autoencoder = generate_autoencoder()
|
||||||
|
autoencoder.compile(loss='mse', optimizer='adam', metrics=[tf.keras.metrics.Accuracy()])
|
||||||
|
return inp, autoencoder
|
||||||
|
|
||||||
|
def onJsonLineRecvd(json_dict, instance, current_flow, global_user_data):
|
||||||
|
if 'packet_event_name' not in json_dict:
|
||||||
|
return True
|
||||||
|
|
||||||
|
if json_dict['packet_event_name'] != 'packet' and \
|
||||||
|
json_dict['packet_event_name'] != 'packet-flow':
|
||||||
|
return True
|
||||||
|
|
||||||
|
_, padded_pkts = global_user_data
|
||||||
|
buf = base64.b64decode(json_dict['pkt'], validate=True)
|
||||||
|
|
||||||
|
# Generate decimal byte buffer with valus from 0-255
|
||||||
|
int_buf = []
|
||||||
|
for v in buf:
|
||||||
|
int_buf.append(int(v))
|
||||||
|
|
||||||
|
mat = np.array([int_buf])
|
||||||
|
|
||||||
|
# Normalize the values
|
||||||
|
mat = mat.astype('float32') / 255.
|
||||||
|
|
||||||
|
# Mean removal
|
||||||
|
matmean = np.mean(mat, axis=0)
|
||||||
|
mat -= matmean
|
||||||
|
|
||||||
|
# Pad resulting matrice
|
||||||
|
buf = preprocessing.sequence.pad_sequences(mat, padding="post", maxlen=input_size, truncating='post')
|
||||||
|
padded_pkts.append(buf[0])
|
||||||
|
|
||||||
|
sys.stdout.write('.')
|
||||||
|
sys.stdout.flush()
|
||||||
|
if (len(padded_pkts) % training_size == 0):
|
||||||
|
print('\nGot {} packets, training..'.format(len(padded_pkts)))
|
||||||
|
tmp = np.array(padded_pkts)
|
||||||
|
history = autoencoder.fit(
|
||||||
|
tmp, tmp, epochs=10, batch_size=batch_size,
|
||||||
|
validation_split=0.2,
|
||||||
|
shuffle=True
|
||||||
|
)
|
||||||
|
padded_pkts.clear()
|
||||||
|
|
||||||
|
#plot_model(autoencoder, show_shapes=True, show_layer_names=True)
|
||||||
|
#plt.plot(history.history['loss'])
|
||||||
|
#plt.plot(history.history['val_loss'])
|
||||||
|
#plt.title('model loss')
|
||||||
|
#plt.xlabel('loss')
|
||||||
|
#plt.ylabel('val_loss')
|
||||||
|
#plt.legend(['loss', 'val_loss'], loc='upper left')
|
||||||
|
#plt.show()
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.stderr.write('\b\n***************\n')
|
||||||
|
sys.stderr.write('*** WARNING ***\n')
|
||||||
|
sys.stderr.write('***************\n')
|
||||||
|
sys.stderr.write('\nThis is an unmature Autoencoder example.\n')
|
||||||
|
sys.stderr.write('Please do not rely on any of it\'s output!\n\n')
|
||||||
|
|
||||||
|
argparser = nDPIsrvd.defaultArgumentParser()
|
||||||
|
args = argparser.parse_args()
|
||||||
|
address = nDPIsrvd.validateAddress(args)
|
||||||
|
|
||||||
|
sys.stderr.write('Recv buffer size: {}\n'.format(nDPIsrvd.NETWORK_BUFFER_MAX_SIZE))
|
||||||
|
sys.stderr.write('Connecting to {} ..\n'.format(address[0]+':'+str(address[1]) if type(address) is tuple else address))
|
||||||
|
|
||||||
|
_, autoencoder = compile_autoencoder()
|
||||||
|
|
||||||
|
nsock = nDPIsrvdSocket()
|
||||||
|
nsock.connect(address)
|
||||||
|
try:
|
||||||
|
padded_pkts = list()
|
||||||
|
nsock.loop(onJsonLineRecvd, None, (autoencoder, padded_pkts))
|
||||||
|
except nDPIsrvd.SocketConnectionBroken as err:
|
||||||
|
sys.stderr.write('\n{}\n'.format(err))
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print()
|
||||||
1
examples/py-schema-validation/requirements.txt
Normal file
1
examples/py-schema-validation/requirements.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
jsonschema
|
||||||
Reference in New Issue
Block a user