diff --git a/README.md b/README.md index 409faef..cccad3c 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,25 @@ # generating-reviews-discovering-sentiment -Code for "Learning to Generate Reviews and Discovering Sentiment" -This is where we'll be putting code related to the paper [Learning to Generate Reviews and Discovering Sentiment](https://arxiv.org/abs/1704.01444) (Alec Radford, Rafal Jozefowicz, Ilya Sutskever). +This is a fork of the code related to the paper [Learning to Generate Reviews and Discovering Sentiment](https://arxiv.org/abs/1704.01444) (Alec Radford, Rafal Jozefowicz, Ilya Sutskever). -Right now the code supports using the language model as feature extractor. +It supports two use major use cases. +## Use the language model to generate reviews: +``` +from encoder import Model +mdl = Model() + +review_completed = mdl.generate_sequence("I couldn’t figure out", override={2388 : 1.0}) +``` +Optional constraints for `generate_sequence` shape the review to its desired length, tone and variance. For example, the parameters above complete the phrase with a postive sentiment by fixing the [infamous neuron 2388](https://blog.openai.com/unsupervised-sentiment-neuron/#sentimentneuron) to 1.0. + +Stochastic and/or argmax sampling are used to vary the completed phrase. For an explanation of all parameters refer to the docstring and for example usage refer to `test_generate.py`. + +Some reviews that have been generated: +![alt text](https://raw.githubusercontent.com/ahirner/generating-reviews-discovering-sentiment/master/samples_sentiment.jpeg) + + +## Use the language model as feature extractor: ``` from encoder import Model diff --git a/encoder.py b/encoder.py index b28c68b..9beec8c 100644 --- a/encoder.py +++ b/encoder.py @@ -65,7 +65,7 @@ def mlstm(inputs, c, h, M, ndim, scope='lstm', wn=False): for idx, x in enumerate(inputs): m = tf.matmul(x, wmx)*tf.matmul(h, wmh) z = tf.matmul(x, wx) + tf.matmul(m, wh) + b - i, f, o, u = tf.split(1, 4, z) + i, f, o, u = tf.split(z, 4, 1) i = tf.nn.sigmoid(i) f = tf.nn.sigmoid(f) o = tf.nn.sigmoid(o) @@ -81,22 +81,22 @@ def mlstm(inputs, c, h, M, ndim, scope='lstm', wn=False): h = o*tf.tanh(c) inputs[idx] = h cs.append(c) - cs = tf.pack(cs) + cs = tf.stack(cs) return inputs, cs, c, h def model(X, S, M=None, reuse=False): nsteps = X.get_shape()[1] - cstart, hstart = tf.unpack(S, num=hps.nstates) + cstart, hstart = tf.unstack(S, num=hps.nstates) with tf.variable_scope('model', reuse=reuse): words = embd(X, hps.nembd) - inputs = [tf.squeeze(v, [1]) for v in tf.split(1, nsteps, words)] + inputs = tf.unstack(words, nsteps, 1) hs, cells, cfinal, hfinal = mlstm( inputs, cstart, hstart, M, hps.nhidden, scope='rnn', wn=hps.rnn_wn) - hs = tf.reshape(tf.concat(1, hs), [-1, hps.nhidden]) + hs = tf.reshape(tf.concat(hs, 1), [-1, hps.nhidden]) logits = fc( hs, hps.nvocab, act=lambda x: x, wn=hps.out_wn, scope='out') - states = tf.pack([cfinal, hfinal], 0) + states = tf.stack([cfinal, hfinal], 0) return cells, states, logits @@ -143,7 +143,7 @@ def __init__(self, nbatch=128, nsteps=64): cells, states, logits = model(X, S, M, reuse=False) sess = tf.Session() - tf.initialize_all_variables().run(session=sess) + tf.global_variables_initializer().run(session = sess) def seq_rep(xmb, mmb, smb): return sess.run(states, {X: xmb, M: mmb, S: smb}) @@ -200,9 +200,74 @@ def cell_transform(xs, indexes=None): Fs.append(smb) Fs = np.concatenate(Fs, axis=1).transpose(1, 0, 2) return Fs + + def generate_sequence(x_start, override={}, sampling = 0, len_add = '.'): + """Continue a given sequence. + Args: + x_start (string): The string to be continued. + override (dict): Values of the hidden state to override + with keys of the dictionary as index. + sampling (int): 0 greedy argmax, 2 weighted random from probabilty + distribution, 1 weighted but only once after each word. + len_add (int, string, None): + If int, the number of characters to be added. + If string, returns after each contained character was seen once. + + Returns: + The completed string including transformation and paddings from preprocessing. + + Example: + generate_sequence("I couldn’t figure out", override= {2388 : 1.0}) + """ + + len_start = len(x_start) + x = bytearray(preprocess(x_start)) + + string_terminate = isinstance(len_add, str) + len_end = (-1 if string_terminate else (len_start + len_add)) + + ndone = 0 + last_chr = chr(x[-1]) + smb = np.zeros((2, 1, hps.nhidden)) + + while True if string_terminate else ndone <= len_end: + xsubseq = x[ndone:ndone+nsteps] + ndone += len(xsubseq) + xmb, mmb = batch_pad([xsubseq], 1, nsteps) + + #Override salient neurons + for neuron, value in override.items(): + smb[:, :, neuron] = value + + if ndone <= len_start: + #Prime hidden state with full steps + smb = sess.run(states, {X: xmb, S: smb, M: mmb}) + else: + #Incrementally add characters + outs, smb = sess.run([logits, states], {X: xmb, S: smb, M: mmb}) + out = outs[-1] + + #Do uniform weighted sampling always or only after ' ' + if (sampling == 1 and last_chr == ' ') or sampling == 2: + squashed = np.exp(out) / np.sum(np.exp(out), axis=0) + last_chr = chr(np.random.choice(len(squashed), p=squashed)) + else: + last_chr = chr(np.argmax(out)) + + x.append(ord(last_chr)) + + if string_terminate and (last_chr in len_add): + len_add = len_add.replace(last_chr, "", 1) + if len(len_add) == 0: + break + + return(x.decode()) + + self.transform = transform self.cell_transform = cell_transform + self.generate_sequence = generate_sequence if __name__ == '__main__': diff --git a/samples_sentiment.jpeg b/samples_sentiment.jpeg new file mode 100644 index 0000000..0e6e55a Binary files /dev/null and b/samples_sentiment.jpeg differ diff --git a/test_generative.py b/test_generative.py new file mode 100644 index 0000000..a7511bb --- /dev/null +++ b/test_generative.py @@ -0,0 +1,36 @@ +from encoder import Model +mdl = Model() + +base = "I couldn’t figure out" +print("\'%s\'... --> (argmax sampling):" % base) + +#Overriden values are slightly on the extreme on either end of +#the sentiment's activation distribution +positive = mdl.generate_sequence(base, override={2388 : 1.0}) +print("Positive sentiment (1 sentence): " + positive) +negative = mdl.generate_sequence(base, override={2388 : -1.5}, len_add = 100) +print("\nNegative sentiment (+100 chars):" + negative + '...') + + +n = 3 +print("\n\n\'%s\'... --> (weighted samples after each word):" % base) + +print("Positive sentiment (%d examples, 2 sentences each):" %n) +for i in range(n): + positive = mdl.generate_sequence(base, override={2388 : 1.0}, len_add = '..', sampling = 1) + print("(%d)%s" % (i, positive[1:])) + +print("\nNegative sentiment (%d examples, 2 sentences each):" %n) +for i in range(n): + positive = mdl.generate_sequence(base, override={2388 : -1.5}, len_add = '..', sampling = 1) + print("(%d)%s" % (i, positive[1:])) + + +print("\n\n\'%s\'... --> (weighted samples after each character):" % base) + +neutral = mdl.generate_sequence(base, len_add = '...', sampling = 2) +print("Sentiment not influenced (3 sentences):" + neutral) +neutral = mdl.generate_sequence(base, override={2388 : 0.0}, len_add = '...', sampling = 2) +print("\nSentiment fixed to 0 (3 sentences):" + neutral) +negative = mdl.generate_sequence(base, override={2388 : -1.0}, len_add = '...', sampling = 2) +print("\nSligthly negative sentiment (3 sentences):" + negative) \ No newline at end of file