priyesh17 commited on
Commit
d34f465
·
verified ·
1 Parent(s): 325e144

path update

Browse files
Files changed (1) hide show
  1. util.py +69 -69
util.py CHANGED
@@ -1,70 +1,70 @@
1
- import tensorflow as tf
2
- from tensorflow.keras.applications.vgg16 import preprocess_input
3
- from tensorflow.keras.preprocessing.image import img_to_array
4
- from tensorflow.keras.preprocessing.sequence import pad_sequences
5
- import numpy as np
6
- import pickle
7
-
8
- CapGenerator = tf.keras.models.load_model('models/CapGen.h5')
9
- VGGMod = tf.keras.models.load_model('models/VGGModel.h5')
10
- max_length = 35
11
-
12
- with open('models/tokenizer.pickle', 'rb') as handle:
13
- tokenizer = pickle.load(handle)
14
-
15
- vocab_size = len(tokenizer.word_index) + 1
16
-
17
- def idx_to_word(integer, tokenizer):
18
- for word, index in tokenizer.word_index.items():
19
- if index == integer:
20
- return word
21
- return None
22
-
23
- def predict_caption(model, image, tokenizer, max_length=max_length):
24
- # add start tag for generation process
25
- in_text = 'startseq'
26
- # iterate over the max length of sequence
27
- for i in range(max_length):
28
- # encode input sequence
29
- sequence = tokenizer.texts_to_sequences([in_text])[0]
30
- # pad the sequence
31
- sequence = pad_sequences([sequence], max_length)
32
- # predict next word
33
- yhat = model.predict([image, sequence], verbose=0)
34
- # get index with high probability
35
- yhat = np.argmax(yhat)
36
- # convert index to word
37
- word = idx_to_word(yhat, tokenizer)
38
- # stop if word not found
39
- if word is None:
40
- break
41
- # append word as input for generating next word
42
- in_text += " " + word
43
- # stop if we reach end tag
44
- if word == 'endseq':
45
- break
46
-
47
- return in_text
48
-
49
- def feature_extractor(image):
50
-
51
- # Img to np array
52
- image = img_to_array(image)
53
-
54
- # Reshaping
55
- image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
56
-
57
- # Preprocessing for passing through VGG16
58
- image = preprocess_input(image)
59
-
60
- feature = VGGMod.predict(image, verbose=0)
61
-
62
- return feature
63
-
64
- def generate_caption(image_name):
65
-
66
- y_pred = predict_caption(CapGenerator, feature_extractor(image_name), tokenizer, max_length)
67
- y_pred = y_pred[8:-7].upper()
68
-
69
- return y_pred
70
 
 
1
+ import tensorflow as tf
2
+ from tensorflow.keras.applications.vgg16 import preprocess_input
3
+ from tensorflow.keras.preprocessing.image import img_to_array
4
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
5
+ import numpy as np
6
+ import pickle
7
+
8
+ CapGenerator = tf.keras.models.load_model('CapGen.h5')
9
+ VGGMod = tf.keras.models.load_model('VGGModel.h5')
10
+ max_length = 35
11
+
12
+ with open('models/tokenizer.pickle', 'rb') as handle:
13
+ tokenizer = pickle.load(handle)
14
+
15
+ vocab_size = len(tokenizer.word_index) + 1
16
+
17
+ def idx_to_word(integer, tokenizer):
18
+ for word, index in tokenizer.word_index.items():
19
+ if index == integer:
20
+ return word
21
+ return None
22
+
23
+ def predict_caption(model, image, tokenizer, max_length=max_length):
24
+ # add start tag for generation process
25
+ in_text = 'startseq'
26
+ # iterate over the max length of sequence
27
+ for i in range(max_length):
28
+ # encode input sequence
29
+ sequence = tokenizer.texts_to_sequences([in_text])[0]
30
+ # pad the sequence
31
+ sequence = pad_sequences([sequence], max_length)
32
+ # predict next word
33
+ yhat = model.predict([image, sequence], verbose=0)
34
+ # get index with high probability
35
+ yhat = np.argmax(yhat)
36
+ # convert index to word
37
+ word = idx_to_word(yhat, tokenizer)
38
+ # stop if word not found
39
+ if word is None:
40
+ break
41
+ # append word as input for generating next word
42
+ in_text += " " + word
43
+ # stop if we reach end tag
44
+ if word == 'endseq':
45
+ break
46
+
47
+ return in_text
48
+
49
+ def feature_extractor(image):
50
+
51
+ # Img to np array
52
+ image = img_to_array(image)
53
+
54
+ # Reshaping
55
+ image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
56
+
57
+ # Preprocessing for passing through VGG16
58
+ image = preprocess_input(image)
59
+
60
+ feature = VGGMod.predict(image, verbose=0)
61
+
62
+ return feature
63
+
64
+ def generate_caption(image_name):
65
+
66
+ y_pred = predict_caption(CapGenerator, feature_extractor(image_name), tokenizer, max_length)
67
+ y_pred = y_pred[8:-7].upper()
68
+
69
+ return y_pred
70