TFLite Conversion

import tensorflow as tf

# `model` = Keras model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Save model to HDF5 format
model.save('my_model.h5', save_format='h5')

# Convert model to TensorFlow Lite format w quantization
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]

# for compatibility with microcontrollers, enforce full integer quantization
def representative_dataset_gen():
  for _ in range(100):
    yield [np.random.random((1, len(sensor_columns))).astype(np.float32)]

converter.representative_dataset = representative_dataset_gen
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8

tflite_model = converter.convert()

# Save converted model to binary file
with open('my_model.tflite', 'wb') as f:
  f.write(tflite_model)

Confirm the model converted correctly:

# Load the TFLite model in TFLite Interpreter
interpreter = tf.lite.Interpreter(model_path="my_model.tflite")
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

input_shape = input_details[0]['shape']
input_data = np.array(np.random.random_sample(input_shape), dtype=np.int8)
interpreter.set_tensor(input_details[0]['index'], input_data)

interpreter.invoke()

output_data = interpreter.get_tensor(output_details[0]['index'])
print(output_data)