TF2 0 Text Preprocessing
================ by Jawad Haider
Text Preprocessing¶
# Install TensorFlow
# !pip install -q tensorflow-gpu==2.0.0-beta1
try:
%tensorflow_version 2.x # Colab only.
except Exception:
pass
import tensorflow as tf
print(tf.__version__)
`%tensorflow_version` only switches the major version: `1.x` or `2.x`.
You set: `2.x # Colab only.`. This will be interpreted as: `2.x`.
TensorFlow 2.x selected.
2.0.0-beta1
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
# Just a simple test
sentences = [
"I like eggs and ham.",
"I love chocolate and bunnies.",
"I hate onions."
]
MAX_VOCAB_SIZE = 20000
tokenizer = Tokenizer(num_words=MAX_VOCAB_SIZE)
tokenizer.fit_on_texts(sentences)
sequences = tokenizer.texts_to_sequences(sentences)
[[1, 3, 4, 2, 5], [1, 6, 7, 2, 8], [1, 9, 10]]
{'and': 2,
'bunnies': 8,
'chocolate': 7,
'eggs': 4,
'ham': 5,
'hate': 9,
'i': 1,
'like': 3,
'love': 6,
'onions': 10}
[[ 1 3 4 2 5]
[ 1 6 7 2 8]
[ 0 0 1 9 10]]
[[ 1 3 4 2 5]
[ 1 6 7 2 8]
[ 0 0 1 9 10]]
[[ 1 3 4 2 5]
[ 1 6 7 2 8]
[ 1 9 10 0 0]]
[[ 0 1 3 4 2 5]
[ 0 1 6 7 2 8]
[ 0 0 0 1 9 10]]
[[ 3 4 2 5]
[ 6 7 2 8]
[ 0 1 9 10]]
[[ 1 3 4 2]
[ 1 6 7 2]
[ 0 1 9 10]]