6 years ago · 28d10e7697
--- a/marketstudy/.DS_Store
+++ b/marketstudy/.DS_Store
--- a/Prediction/.DS_Store
+++ b/Prediction/.DS_Store
--- a/marketstudy/Equity
+++ b/marketstudy/Equity
@@ -0,0 +1,45 @@
 
				+import tensorflow as tf
			
 
				+import numpy as np
			
 
				+
			
 
				+class CNNtextual(object):
			
 
				+
			
 
				+    def __init__(self, embedding_matrix, sequence_length, vocab_size, embedding_size, filter_sizes, num_filters):
			
 
				+
			
 
				+        # Placeholders for input, dropout
			
 
				+        self.input_text = tf.placeholder(tf.int32, [None, sequence_length], name="input_text")
			
 
				+        self.keep_prob = tf.placeholder(tf.float32, name="keep_prob")
			
 
				+
			
 
				+        # embedding layer
			
 
				+        with tf.name_scope("embedding"):
			
 
				+            self.embed_input = tf.nn.embedding_lookup(embedding_matrix, self.input_x)
			
 
				+            self.embed_input_expanded = tf.expand_dims(self.embed_input, -1)
			
 
				+
			
 
				+        # conv and maxpool layer for each size
			
 
				+        pooled_ouputs = []
			
 
				+        for i, filter_size in enumerate(filter_sizes):
			
 
				+            with tf.name_scope("conv-maxpool-{}".format(str(filter_size))):
			
 
				+                # Conv layer
			
 
				+                filter_shape = [filter_size, embedding_size, 1, num_filters]
			
 
				+                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
			
 
				+                b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
			
 
				+                conv = tf.nn.conv2d(self.embed_input_expanded, W, strides=[1,1,1,1], padding="VALID", name="conv")
			
 
				+                # relu
			
 
				+                h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
			
 
				+
			
 
				+                # max-pooling over outputs
			
 
				+                pooled = tf.nn.max_pool(h, ksize=[1, sequence_length-filter_size + 1, 1, 1], padding="VALID", name="pool")
			
 
				+                pooled_outputs.append(pooled)
			
 
				+
			
 
				+        # Combine all pooled features
			
 
				+        num_filters_total = num_filters*len(filter_sizes)
			
 
				+        self.hpool = tf.concat(3, pooled_outputs)
			
 
				+        self.hpool_flat = tf.reshape(self.hpool, [-1, num_filters_total])
			
 
				+
			
 
				+        # dropout
			
 
				+        with tf.name_scope("dropout"):
			
 
				+            self.h_drop = tf.nn.dropout(self.h_pool_flat, self.keep_prob)
			
 
				+
			
 
				+        # TODO Recurrent layer
			
 
				+
			
 
				+        with sess as tf.Session():
			
 
				+            
			
--- a/Prediction/CNNtextual.py
+++ b/Prediction/CNNtextual.py
@@ -0,0 +1,41 @@
 
				+import tensorflow as tf
			
 
				+import numpy as np
			
 
				+
			
 
				+class CNNtextual(object):
			
 
				+
			
 
				+    def __init__(self, embedding_matrix, sequence_length, vocab_size, embedding_size, filter_sizes, num_filters):
			
 
				+
			
 
				+        # Placeholders for input, dropout
			
 
				+        self.input_text = tf.placeholder(tf.int32, [None, sequence_length], name="input_text")
			
 
				+        self.keep_prob = tf.placeholder(tf.float32, name="keep_prob")
			
 
				+        with tf.name_scope("embedding"):
			
 
				+            self.embed_input = tf.nn.embedding_lookup(embedding_matrix, self.input_x)
			
 
				+            self.embed_input_expanded = tf.expand_dims(self.embed_input, -1)
			
 
				+
			
 
				+        # conv and maxpool layer for each size
			
 
				+        pooled_ouputs = []
			
 
				+        for i, filter_size in enumerate(filter_sizes):
			
 
				+            with tf.name_scope("conv-maxpool-{}".format(str(filter_size))):
			
 
				+                # Conv layer
			
 
				+                filter_shape = [filter_size, embedding_size, 1, num_filters]
			
 
				+                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
			
 
				+                b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
			
 
				+                conv = tf.nn.conv2d(self.embed_input_expanded, W, strides=[1,1,1,1], padding="VALID", name="conv")
			
 
				+                # relu
			
 
				+                h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
			
 
				+
			
 
				+                # max-pooling over outputs
			
 
				+                pooled = tf.nn.max_pool(h, ksize=[1, sequence_length-filter_size + 1, 1, 1], padding="VALID", name="pool")
			
 
				+                pooled_outputs.append(pooled)
			
 
				+
			
 
				+        # Combine all pooled features
			
 
				+        num_filters_total = num_filters*len(filter_sizes)
			
 
				+        self.hpool = tf.concat(3, pooled_outputs)
			
 
				+        self.hpool_flat = tf.reshape(self.hpool, [-1, num_filters_total])
			
 
				+
			
 
				+        # dropout
			
 
				+        with tf.name_scope("dropout"):
			
 
				+            self.h_drop = tf.nn.dropout(self.h_pool_flat, self.keep_prob)
			
 
				+        
			
 
				+        # TODO Recurrent layer
			
 
				+        
			
--- a/marketstudy/Equity
+++ b/marketstudy/Equity
@@ -0,0 +1,295 @@
 
				+import tensorflow as tf
			
 
				+from tensorflow.python.ops.rnn_cell_impl import _zero_state_tensors
			
 
				+from tensorflow.python.layers.core import Dense
			
 
				+import numpy as np
			
 
				+
			
 
				+from nltk.corpus import stopwords
			
 
				+import re
			
 
				+
			
 
				+contractions = {
			
 
				+"ain't": "am not",
			
 
				+"aren't": "are not",
			
 
				+"can't": "cannot",
			
 
				+"can't've": "cannot have",
			
 
				+"'cause": "because",
			
 
				+"could've": "could have",
			
 
				+"couldn't": "could not",
			
 
				+"couldn't've": "could not have",
			
 
				+"didn't": "did not",
			
 
				+"doesn't": "does not",
			
 
				+"don't": "do not",
			
 
				+"hadn't": "had not",
			
 
				+"hadn't've": "had not have",
			
 
				+"hasn't": "has not",
			
 
				+"haven't": "have not",
			
 
				+"he'd": "he would",
			
 
				+"he'd've": "he would have",
			
 
				+"he'll": "he will",
			
 
				+"he's": "he is",
			
 
				+"how'd": "how did",
			
 
				+"how'll": "how will",
			
 
				+"how's": "how is",
			
 
				+"i'd": "i would",
			
 
				+"i'll": "i will",
			
 
				+"i'm": "i am",
			
 
				+"i've": "i have",
			
 
				+"isn't": "is not",
			
 
				+"it'd": "it would",
			
 
				+"it'll": "it will",
			
 
				+"it's": "it is",
			
 
				+"let's": "let us",
			
 
				+"ma'am": "madam",
			
 
				+"mayn't": "may not",
			
 
				+"might've": "might have",
			
 
				+"mightn't": "might not",
			
 
				+"must've": "must have",
			
 
				+"mustn't": "must not",
			
 
				+"needn't": "need not",
			
 
				+"oughtn't": "ought not",
			
 
				+"shan't": "shall not",
			
 
				+"sha'n't": "shall not",
			
 
				+"she'd": "she would",
			
 
				+"she'll": "she will",
			
 
				+"she's": "she is",
			
 
				+"should've": "should have",
			
 
				+"shouldn't": "should not",
			
 
				+"that'd": "that would",
			
 
				+"that's": "that is",
			
 
				+"there'd": "there had",
			
 
				+"there's": "there is",
			
 
				+"they'd": "they would",
			
 
				+"they'll": "they will",
			
 
				+"they're": "they are",
			
 
				+"they've": "they have",
			
 
				+"wasn't": "was not",
			
 
				+"we'd": "we would",
			
 
				+"we'll": "we will",
			
 
				+"we're": "we are",
			
 
				+"we've": "we have",
			
 
				+"weren't": "were not",
			
 
				+"what'll": "what will",
			
 
				+"what're": "what are",
			
 
				+"what's": "what is",
			
 
				+"what've": "what have",
			
 
				+"where'd": "where did",
			
 
				+"where's": "where is",
			
 
				+"who'll": "who will",
			
 
				+"who's": "who is",
			
 
				+"won't": "will not",
			
 
				+"wouldn't": "would not",
			
 
				+"you'd": "you would",
			
 
				+"you'll": "you will",
			
 
				+"you're": "you are"
			
 
				+}
			
 
				+
			
 
				+
			
 
				+# text cleaning
			
 
				+def clean_text(text, remove_stopwords = False):
			
 
				+
			
 
				+    # Convert words to lower case
			
 
				+    text = text.lower()
			
 
				+
			
 
				+    # Replace contractions with their longer forms
			
 
				+    if True:
			
 
				+        text = text.split()
			
 
				+        new_text = []
			
 
				+        for word in text:
			
 
				+            if word in contractions:
			
 
				+                new_text.append(contractions[word])
			
 
				+            else:
			
 
				+                new_text.append(word)
			
 
				+        text = " ".join(new_text)
			
 
				+
			
 
				+    # Format words and remove unwanted characters
			
 
				+    text = re.sub(r'https?:\/\/.*[\r\n]*', '', text, flags=re.MULTILINE)
			
 
				+    text = re.sub(r'\<a href', ' ', text)
			
 
				+    text = re.sub(r'&amp;', '', text)
			
 
				+    text = re.sub(r'[_"\-;%()|+&=*%.,!?:#$@\[\]/]', ' ', text)
			
 
				+    text = re.sub(r'<br />', ' ', text)
			
 
				+    text = re.sub(r'\'', ' ', text)
			
 
				+
			
 
				+    # Optionally, remove stop words
			
 
				+    if remove_stopwords:
			
 
				+        text = text.split()
			
 
				+        stops = set(stopwords.words("english"))
			
 
				+        text = [w for w in text if not w in stops]
			
 
				+        text = " ".join(text)
			
 
				+
			
 
				+    return text
			
 
				+
			
 
				+def vocab_to_int_dict(word_count):
			
 
				+    vocab_to_int = {}
			
 
				+    value = 0
			
 
				+
			
 
				+    for word, _ in word_count.items():
			
 
				+        vocab_to_int[word] = value
			
 
				+        value += 1
			
 
				+    vocab_to_int["<UNK>"] = len(vocab_to_int)
			
 
				+    vocab_to_int["<PAD>"] = len(vocab_to_int)
			
 
				+
			
 
				+    return vocab_to_int
			
 
				+
			
 
				+def embedd_into_matrix(vocab_to_int, embeddings_index):
			
 
				+    embedding_dim = 300
			
 
				+    word_embedding_matrix = np.zeros((len(vocab_to_int), embedding_dim), dtype=np.float32)
			
 
				+    for word, i in vocab_to_int.items():
			
 
				+        if word in embeddings_index:
			
 
				+            word_embedding_matrix[i] = embeddings_index[word]
			
 
				+        else:
			
 
				+            # If word in not in Glove, create a random vector for the word
			
 
				+            new_embedding = np.array(np.random.uniform(-1.0, 1.0, embedding_dim), dtype=np.float32)
			
 
				+            word_embedding_matrix[i] = new_embedding
			
 
				+    if len(vocab_to_int) == len(word_embedding_matrix):
			
 
				+        print("All check.")
			
 
				+
			
 
				+    return word_embedding_matrix
			
 
				+
			
 
				+# Convert word tokens into their integer representations
			
 
				+def convert_to_ints(text, vocab_to_int):
			
 
				+    ints = []
			
 
				+    for sentence in text:
			
 
				+        sentence_ints = []
			
 
				+        for word in sentence.split():
			
 
				+            if word in vocab_to_int:
			
 
				+                sentence_ints.append(vocab_to_int[word])
			
 
				+            else:
			
 
				+                sentence_ints.append(vocab_to_int["<UNK>"])
			
 
				+        ints.append(sentence_ints)
			
 
				+
			
 
				+    return ints
			
 
				+
			
 
				+# completes text transformation to converting words into integers
			
 
				+def preprocess(clean_articles):
			
 
				+    # word count
			
 
				+    word_count = {}
			
 
				+    for line in clean_articles:
			
 
				+        for word in line.split():
			
 
				+            if word not in word_count:
			
 
				+                word_count[word] = 1
			
 
				+            else:
			
 
				+                word_count[word] += 1
			
 
				+
			
 
				+    vocab_to_int = {}
			
 
				+    vocab_to_int = vocab_to_int_dict(word_count)
			
 
				+    int_articles = convert_to_ints(clean_articles, vocab_to_int)
			
 
				+
			
 
				+    # find maximum summary length
			
 
				+    max_sequence_length = 0
			
 
				+    for line in int_articles:
			
 
				+        if len(line) > max_sequence_length:
			
 
				+            max_sequence_length = len(line)
			
 
				+    print("Max Summary Length: ", max_sequence_length)
			
 
				+    padded_articles = []
			
 
				+    # add padding to all articles
			
 
				+    for line in int_articles:
			
 
				+        adding = []
			
 
				+        adding = [vocab_to_int["<PAD>"]]*(max_sequence_length - len(line))
			
 
				+        padded_articles.append(line + adding)
			
 
				+
			
 
				+    return padded_articles, max_sequence_length, vocab_to_int
			
 
				+
			
 
				+# importing data
			
 
				+articles = []
			
 
				+with open("./train.title.txt") as f:
			
 
				+    for line in f:
			
 
				+        articles.append(line)
			
 
				+
			
 
				+articles = articles[:10000]
			
 
				+
			
 
				+# import the glove embedding
			
 
				+embeddings_index = {}
			
 
				+with open("./glove.6B.300d.txt", encoding='utf-8') as f:
			
 
				+    for line in f:
			
 
				+        values = line.split()
			
 
				+        word = values[0]
			
 
				+        embedding = np.asarray(values[1:], dtype='float32')
			
 
				+        embeddings_index[word] = embedding
			
 
				+
			
 
				+clean_articles = []
			
 
				+for article in articles:
			
 
				+    clean_articles.append(clean_text(article))
			
 
				+
			
 
				+padded_articles, max_sequence_length, vocab_to_int = preprocess(clean_articles)
			
 
				+word_embedding_matrix = embedd_into_matrix(vocab_to_int, embeddings_index)
			
 
				+
			
 
				+embedding_size = 300
			
 
				+num_filters = 128
			
 
				+batch_size = 20
			
 
				+cell_size = 128
			
 
				+num_features = 7
			
 
				+lstm_keep_prob = 0.6
			
 
				+
			
 
				+# Developing Graph
			
 
				+# placeholders for inputs
			
 
				+input_x = tf.placeholder(tf.int32, [None, max_sequence_length], name="input_x")
			
 
				+input_num = tf.placeholder(tf.float32, [None, 100, num_features], name="input_num")
			
 
				+keep_prob = tf.placeholder(tf.float32, name="keep_prob")
			
 
				+targets = tf.placeholder(tf.float32, [batch_size, 1], name="targets")
			
 
				+
			
 
				+
			
 
				+with tf.name_scope("embedding"):
			
 
				+    embed_input = tf.nn.embedding_lookup(word_embedding_matrix, input_x)
			
 
				+    embed_input_expanded = tf.expand_dims(embed_input, -1)
			
 
				+
			
 
				+# conv and maxpool layer for each size
			
 
				+pooled_outputs = []
			
 
				+filter_sizes = [3, 4, 5]
			
 
				+
			
 
				+for i, filter_size in enumerate(filter_sizes):
			
 
				+    with tf.name_scope("conv-maxpool-{}".format(str(filter_size))):
			
 
				+        # Conv layer
			
 
				+        filter_shape = [filter_size, embedding_size, 1, num_filters]
			
 
				+        W = tf.Variable(tf.truncated_normal(shape=filter_shape, stddev=0.1), name="W")
			
 
				+        b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
			
 
				+        conv = tf.nn.conv2d(embed_input_expanded, W, strides=[1,1,1,1], padding="VALID", name="conv")
			
 
				+        # relu
			
 
				+        h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
			
 
				+        # max-pooling
			
 
				+        pooled = tf.nn.max_pool(h, ksize=[1, max_sequence_length-filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding="VALID", name="pool")
			
 
				+        pooled_outputs.append(pooled)
			
 
				+
			
 
				+# Combine all pooled outputs
			
 
				+num_filters_total = num_filters*len(filter_sizes)
			
 
				+hpool = tf.concat(pooled_outputs, 3)
			
 
				+hpool_flat = tf.reshape(hpool, [-1, num_filters_total, 1])
			
 
				+
			
 
				+with tf.name_scope("dropout"):
			
 
				+    h_drop = tf.nn.dropout(hpool_flat, keep_prob)
			
 
				+
			
 
				+# TODO Recurrent Layer
			
 
				+with tf.name_scope("rnn_textual") as rt:
			
 
				+    cell = tf.contrib.rnn.LSTMCell(cell_size, initializer=tf.random_uniform_initializer(-0.1, 0.1))
			
 
				+    cell = tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=lstm_keep_prob)
			
 
				+    initial_state = cell.zero_state(batch_size, dtype=tf.float32)
			
 
				+    rnn_outputs, rnn_states = tf.nn.dynamic_rnn(cell, hpool_flat, initial_state=initial_state, dtype=tf.float32, time_major=False)
			
 
				+
			
 
				+with tf.name_scope("rnn_numeric") as rn:
			
 
				+    cell = tf.contrib.rnn.LSTMCell(cell_size, initializer=tf.random_uniform_initializer(-0.1, 0.1))
			
 
				+    cell = tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=lstm_keep_prob)
			
 
				+    initial_state = cell.zero_state(batch_size, dtype=tf.float32)
			
 
				+    rnn_outputs_num, rnn_states_num = tf.nn.dynamic_rnn(cell, input_num, initial_state=initial_state, dtype=tf.float32, scope="rnn_numeric", time_major=False)
			
 
				+
			
 
				+dense_layer_input = tf.concat([rnn_outputs, rnn_outputs_num], 1)
			
 
				+dense_layer_flat = tf.contrib.layers.flatten(dense_layer_input)
			
 
				+dense_dropout = tf.nn.dropout(dense_layer_flat, keep_prob)
			
 
				+final_output = tf.layers.dense(dense_dropout, 1, activation=tf.nn.relu, use_bias=True, kernel_initializer=tf.truncated_normal_initializer(mean = 0.0, stddev=0.1), trainable=True)
			
 
				+print("Graph Done.")
			
 
				+
			
 
				+loss = tf.losses.mean_squared_error(final_output, targets)
			
 
				+optimzer = tf.train.GradientDescentOptimizer(0.1).minimize(loss)
			
 
				+
			
 
				+with tf.Session() as sess:
			
 
				+    sess.run(tf.global_variables_initializer())
			
 
				+    # batch_size = 20
			
 
				+    batches_x = []
			
 
				+    print(final_output.shape)
			
 
				+    # print(rnn_outputs_num.shape)
			
 
				+    for i in range(int(len(padded_articles)/20)):
			
 
				+        batches_x.append(padded_articles[i * batch_size : (i+1) * batch_size])
			
 
				+        # loss, _ = sess.run([loss, optimzer], feed_dict={})
			
 
				+    # for i, batch_x in enumerate(batches_x):
			
 
				+    #     # result = sess.run(hpool_flat, feed_dict={input_x:batch_x, keep_prob:0.6})
			
 
				+    #     print("iteration: {} done.".format(str(i)))
			
 
				+    print("All done.")
			
 
				+
			
 
				+    writer = tf.summary.FileWriter("./my_graph", sess.graph)
			
--- a/Prediction/README.md
+++ b/Prediction/README.md
@@ -0,0 +1,7 @@
 
				+EP.py contains the entire graph and session code in tensorflow.
			
 
				+This model is written using Tensorflow r1.1 framework.
			
 
				+
			
 
				+News articles are scraped for the required time period from ***moneycontrol.com*** .
			
 
				+And technical indicators are taken from ***bseindia.com*** .
			
 
				+
			
 
				+
			
--- a/Prediction/model.py
+++ b/Prediction/model.py
@@ -0,0 +1,161 @@
 
				+import tensorflow as tf
			
 
				+import numpy as np
			
 
				+from CNN import CNNTextual
			
 
				+import os
			
 
				+import time
			
 
				+import datetime
			
 
				+import data_helpers
			
 
				+
			
 
				+
			
 
				+# Model hyperparameters
			
 
				+tf.flags.DEFINE_integer("embedding_dim", 300, "Dimensionality of word embedding")
			
 
				+tf.flags.DEFINE_string("filter_sizes", "3,4,5", "Comma-seperated filter sizes")tf.flags.DEFINE_integer("num_filters", 128, "Number of filters per filter-size")
			
 
				+tf.flags.DEFINE_float("keep_prob", 0.6, "Dropout probability")
			
 
				+
			
 
				+# Training parameters
			
 
				+tf.flags.DEFINE_integer("batch_size", 32, "Batch Size for textual and technical indicators")
			
 
				+tf.flags.DEFINE_interger("num_epochs", 200, "Number of training epochs")
			
 
				+tf.flags.DEFINE_integer("evaluate_every", 10, "Print details every (100) steps")
			
 
				+tf.flags.DEFINE_integer("checkpoint_every", 100, "Save model after every (100) steps")
			
 
				+tf.flags.DEFINE_integer("num_checkpoints", 5, "Number of checkpoints to store")
			
 
				+
			
 
				+# Count the number of occurences of each word in a set
			
 
				+def count_words(count_dict, text):
			
 
				+    for sentence in text:
			
 
				+        for word in sentence.split():
			
 
				+            if word not in count_dict:
			
 
				+                count_dict[word] = 1
			
 
				+            else:
			
 
				+                count_dict[word] += 1
			
 
				+
			
 
				+# preprocessing text
			
 
				+def clean_text(text, remove_stopwords = False):
			
 
				+
			
 
				+    # Convert words to lower case
			
 
				+    text = text.lower()
			
 
				+
			
 
				+    # Replace contractions with their longer forms
			
 
				+    if True:
			
 
				+        text = text.split()
			
 
				+        new_text = []
			
 
				+        for word in text:
			
 
				+            if word in contractions:
			
 
				+                new_text.append(contractions[word])
			
 
				+            else:
			
 
				+                new_text.append(word)
			
 
				+        text = " ".join(new_text)
			
 
				+
			
 
				+    # Format words and remove unwanted characters
			
 
				+    text = re.sub(r'https?:\/\/.*[\r\n]*', '', text, flags=re.MULTILINE)
			
 
				+    text = re.sub(r'\<a href', ' ', text)
			
 
				+    text = re.sub(r'&amp;', '', text)
			
 
				+    text = re.sub(r'[_"\-;%()|+&=*%.,!?:#$@\[\]/]', ' ', text)
			
 
				+    text = re.sub(r'<br />', ' ', text)
			
 
				+    text = re.sub(r'\'', ' ', text)
			
 
				+
			
 
				+    # Optionally, remove stop words
			
 
				+    if remove_stopwords:
			
 
				+        text = text.split()
			
 
				+        stops = set(stopwords.words("english"))
			
 
				+        text = [w for w in text if not w in stops]
			
 
				+        text = " ".join(text)
			
 
				+
			
 
				+        return text
			
 
				+
			
 
				+def vocab_to_int_dict(word_count):
			
 
				+    vocab_to_int = {}
			
 
				+    value = 0
			
 
				+
			
 
				+    for word, _ in word_count.items():
			
 
				+        vocab_to_int[word] = value
			
 
				+        value += 1
			
 
				+    vocab_to_int["<UNK>"] = len(vocab_to_int)
			
 
				+    vocab_to_int["<PAD>"] = len(vocab_to_int)
			
 
				+
			
 
				+    return vocab_to_int
			
 
				+
			
 
				+def embedd_into_matrix(vocab_to_int, embeddings_index):
			
 
				+    embedding_dim = 300
			
 
				+    word_embedding_matrix = np.zeros((len(vocab_to_int), embedding_dim), dtype=np.float32)
			
 
				+    for word, i in vocab_to_int.items():
			
 
				+        if word in embeddings_index:
			
 
				+            word_embedding_matrix[i] = embeddings_index[word]
			
 
				+        else:
			
 
				+            # If word in not in Glove, create a random vector for the word 
			
 
				+            new_embedding = np.array(np.random.uniform(-1.0, 1.0, embedding_dim), dtype=np.float32)
			
 
				+            word_embedding_matrix[i] = new_embedding
			
 
				+    if len(vocab_to_int) == len(word_embedding_matrix):
			
 
				+        print("All check.")
			
 
				+
			
 
				+    return word_embedding_matrix
			
 
				+
			
 
				+# Convert word tokens into their integer representations
			
 
				+def convert_to_ints(text, vocab_to_int):
			
 
				+    ints = []
			
 
				+    for sentence in text:
			
 
				+        sentence_ints = []
			
 
				+        for word in sentence.split():
			
 
				+            if word in vocab_to_int:
			
 
				+                sentence_ints.append(vocab_to_int[word])
			
 
				+            else:
			
 
				+                sentence_ints.append(vocab_to_int["<UNK>"])
			
 
				+        ints.append(sentence_ints)
			
 
				+
			
 
				+    return ints
			
 
				+
			
 
				+def new_conv_layer(input, num_input_channels, filter_size, num_filters, use_pooling=True):
			
 
				+    shape = [filter_size, filter_size, num_input_channels, num_filters]
			
 
				+    weights = new_weights(shape)
			
 
				+    biases = new_biases(length=num_filters)
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    
			
 
				+    articles = []
			
 
				+    with open("./texts,txt") as f:
			
 
				+        for line in f:
			
 
				+            articles.append(line)
			
 
				+
			
 
				+    clean_articles = []
			
 
				+    for article in articles:
			
 
				+        clean_articles.append(clean_text(article))
			
 
				+
			
 
				+    word_count = {}
			
 
				+    for line in clean_articles:
			
 
				+        for word in line:
			
 
				+            if word not in word_count:
			
 
				+                word_count[word] = 1
			
 
				+            else:
			
 
				+                word_count[word] += 1
			
 
				+    # import the glove embedding
			
 
				+    embeddings_index = {}
			
 
				+    with open("./data/glove.6B/glove.6B.300d.txt", encoding='utf-8') as f:
			
 
				+        for line in f:
			
 
				+            values = line.split()
			
 
				+            word = values[0]
			
 
				+            embedding = np.asarray(values[1:], dtype='float32')
			
 
				+            embeddings_index[word] = embedding
			
 
				+    
			
 
				+
			
 
				+    vocab_to_int = {}
			
 
				+    vocab_to_int = vocab_to_int_dict(word_count)
			
 
				+    int_articles = convert_to_ints(clean_articles, vocab_to_int)
			
 
				+    word_embedding_matrix = embedd_into_matrix(vocab_to_int, embeddings_index)
			
 
				+    
			
 
				+    # find maximum summary length
			
 
				+    max_summary_length = 0
			
 
				+    for line in int_articles:
			
 
				+        if len(line) > max_summary_length:
			
 
				+            max_summary_length = len(line)
			
 
				+    print("Max Summary Length: ", max_summary_length)
			
 
				+    
			
 
				+    padded_articles = []
			
 
				+    # add padding to all articles
			
 
				+    for line in int_summaries:
			
 
				+        adding = ["<PAD>"]*(max_summary_length - len(line))
			
 
				+        padded_articles.append(line.extend(adding))
			
 
				+
			
 
				+    # creating the basic structure of the model(v1)
			
 
				+    # placeholder for inputs and outputs 
			
 
				+    x_texts = tf.placeholder(tf.int32, [None, None], name='input_text')
			
 
				+    x_numeric = tf.placeholder(tf.float32, [None, None], name='input_numeric')
			
 
				+    targets = tf.placeholder(tf.float32, [None], name='targets')
			
--- a/Prediction/my_graph/events.out.tfevents.1530000340.Sohits-MacBook-Air.local
+++ b/Prediction/my_graph/events.out.tfevents.1530000340.Sohits-MacBook-Air.local
--- a/Prediction/my_graph/events.out.tfevents.1530000442.Sohits-MacBook-Air.local
+++ b/Prediction/my_graph/events.out.tfevents.1530000442.Sohits-MacBook-Air.local
--- a/Prediction/my_graph/events.out.tfevents.1530000625.Sohits-MacBook-Air.local
+++ b/Prediction/my_graph/events.out.tfevents.1530000625.Sohits-MacBook-Air.local
--- a/Prediction/my_graph/events.out.tfevents.1530000626.Sohits-MacBook-Air.local
+++ b/Prediction/my_graph/events.out.tfevents.1530000626.Sohits-MacBook-Air.local
--- a/Prediction/my_graph/events.out.tfevents.1530000627.Sohits-MacBook-Air.local
+++ b/Prediction/my_graph/events.out.tfevents.1530000627.Sohits-MacBook-Air.local
--- a/Prediction/my_graph/events.out.tfevents.1530000628.Sohits-MacBook-Air.local
+++ b/Prediction/my_graph/events.out.tfevents.1530000628.Sohits-MacBook-Air.local
--- a/Prediction/my_graph/events.out.tfevents.1530000629.Sohits-MacBook-Air.local
+++ b/Prediction/my_graph/events.out.tfevents.1530000629.Sohits-MacBook-Air.local
--- a/Prediction/my_graph/events.out.tfevents.1530000986.Sohits-MacBook-Air.local
+++ b/Prediction/my_graph/events.out.tfevents.1530000986.Sohits-MacBook-Air.local
--- a/Prediction/my_graph/events.out.tfevents.1530021292.Sohits-Air
+++ b/Prediction/my_graph/events.out.tfevents.1530021292.Sohits-Air
--- a/Prediction/my_graph/events.out.tfevents.1530021415.Sohits-Air
+++ b/Prediction/my_graph/events.out.tfevents.1530021415.Sohits-Air
--- a/Prediction/my_graph/events.out.tfevents.1530021512.Sohits-Air
+++ b/Prediction/my_graph/events.out.tfevents.1530021512.Sohits-Air
--- a/Prediction/my_graph/events.out.tfevents.1530021608.Sohits-Air
+++ b/Prediction/my_graph/events.out.tfevents.1530021608.Sohits-Air
--- a/Prediction/my_graph/events.out.tfevents.1530021837.Sohits-Air
+++ b/Prediction/my_graph/events.out.tfevents.1530021837.Sohits-Air
--- a/Prediction/my_graph/events.out.tfevents.1530021929.Sohits-Air
+++ b/Prediction/my_graph/events.out.tfevents.1530021929.Sohits-Air
--- a/Prediction/my_graph/events.out.tfevents.1530022100.Sohits-Air
+++ b/Prediction/my_graph/events.out.tfevents.1530022100.Sohits-Air
--- a/Prediction/my_graph/events.out.tfevents.1530022212.Sohits-Air
+++ b/Prediction/my_graph/events.out.tfevents.1530022212.Sohits-Air
--- a/Prediction/my_graph/events.out.tfevents.1530022293.Sohits-Air
+++ b/Prediction/my_graph/events.out.tfevents.1530022293.Sohits-Air
--- a/Prediction/my_graph/events.out.tfevents.1530022390.Sohits-Air
+++ b/Prediction/my_graph/events.out.tfevents.1530022390.Sohits-Air
--- a/Prediction/my_graph/events.out.tfevents.1530022495.Sohits-Air
+++ b/Prediction/my_graph/events.out.tfevents.1530022495.Sohits-Air
--- a/Prediction/my_graph/events.out.tfevents.1530022599.Sohits-Air
+++ b/Prediction/my_graph/events.out.tfevents.1530022599.Sohits-Air
--- a/Prediction/my_graph/events.out.tfevents.1530022721.Sohits-Air
+++ b/Prediction/my_graph/events.out.tfevents.1530022721.Sohits-Air
--- a/Prediction/my_graph/events.out.tfevents.1530022810.Sohits-Air
+++ b/Prediction/my_graph/events.out.tfevents.1530022810.Sohits-Air
--- a/Prediction/my_graph/events.out.tfevents.1530022907.Sohits-Air
+++ b/Prediction/my_graph/events.out.tfevents.1530022907.Sohits-Air
--- a/Prediction/my_graph/events.out.tfevents.1530023065.Sohits-Air
+++ b/Prediction/my_graph/events.out.tfevents.1530023065.Sohits-Air
--- a/Prediction/my_graph/events.out.tfevents.1530023175.Sohits-Air
+++ b/Prediction/my_graph/events.out.tfevents.1530023175.Sohits-Air
--- a/Prediction/my_graph/events.out.tfevents.1530037036.Sohits-MacBook-Air.local
+++ b/Prediction/my_graph/events.out.tfevents.1530037036.Sohits-MacBook-Air.local
--- a/Prediction/my_graph/events.out.tfevents.1530038198.Sohits-MacBook-Air.local
+++ b/Prediction/my_graph/events.out.tfevents.1530038198.Sohits-MacBook-Air.local
--- a/Prediction/my_graph/events.out.tfevents.1530039175.Sohits-MacBook-Air.local
+++ b/Prediction/my_graph/events.out.tfevents.1530039175.Sohits-MacBook-Air.local
--- a/master
+++ b/master