Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ Details will be updated soon.

Requirement:
======
Python: 2.7
PyTorch: 0.3.0
Python: 3
PyTorch: 0.3.1
(for PyTorch 0.3.1, please refer [issue#8](https://github.com/jiesutd/LatticeLSTM/issues/8) for a slight modification.)

Input format:
Expand Down
80 changes: 40 additions & 40 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import copy
import torch
import gc
import cPickle as pickle
import pickle as pickle
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
Expand Down Expand Up @@ -99,21 +99,21 @@ def save_data_setting(data, save_file):
new_data.test_Ids = []
new_data.raw_Ids = []
## save data settings
with open(save_file, 'w') as fp:
with open(save_file, 'wb') as fp:
pickle.dump(new_data, fp)
print "Data setting saved to file: ", save_file
print("Data setting saved to file: ", save_file)


def load_data_setting(save_file):
with open(save_file, 'r') as fp:
with open(save_file, 'rb') as fp:
data = pickle.load(fp)
print "Data setting loaded from file: ", save_file
print("Data setting loaded from file: ", save_file)
data.show_data_summary()
return data

def lr_decay(optimizer, epoch, decay_rate, init_lr):
lr = init_lr * ((1-decay_rate)**epoch)
print " Learning rate is setted as:", lr
print(" Learning rate is setted as:", lr)
for param_group in optimizer.param_groups:
param_group['lr'] = lr
return optimizer
Expand All @@ -130,7 +130,7 @@ def evaluate(data, model, name):
elif name == 'raw':
instances = data.raw_Ids
else:
print "Error: wrong evaluate name,", name
print("Error: wrong evaluate name,", name)
right_token = 0
whole_token = 0
pred_results = []
Expand Down Expand Up @@ -182,7 +182,7 @@ def batchify_with_label(input_batch_list, gpu, volatile_flag=False):
chars = [sent[2] for sent in input_batch_list]
gazs = [sent[3] for sent in input_batch_list]
labels = [sent[4] for sent in input_batch_list]
word_seq_lengths = torch.LongTensor(map(len, words))
word_seq_lengths = torch.LongTensor(list(map(len, words)))
max_seq_len = word_seq_lengths.max()
word_seq_tensor = autograd.Variable(torch.zeros((batch_size, max_seq_len)), volatile = volatile_flag).long()
biword_seq_tensor = autograd.Variable(torch.zeros((batch_size, max_seq_len)), volatile = volatile_flag).long()
Expand All @@ -202,8 +202,8 @@ def batchify_with_label(input_batch_list, gpu, volatile_flag=False):
### deal with char
# pad_chars (batch_size, max_seq_len)
pad_chars = [chars[idx] + [[0]] * (max_seq_len-len(chars[idx])) for idx in range(len(chars))]
length_list = [map(len, pad_char) for pad_char in pad_chars]
max_word_len = max(map(max, length_list))
length_list = [list(map(len, pad_char)) for pad_char in pad_chars]
max_word_len = max(list(map(max, length_list)))
char_seq_tensor = autograd.Variable(torch.zeros((batch_size, max_seq_len, max_word_len)), volatile = volatile_flag).long()
char_seq_lengths = torch.LongTensor(length_list)
for idx, (seq, seqlen) in enumerate(zip(pad_chars, char_seq_lengths)):
Expand Down Expand Up @@ -234,22 +234,22 @@ def batchify_with_label(input_batch_list, gpu, volatile_flag=False):


def train(data, save_model_dir, seg=True):
print "Training model..."
print("Training model...")
data.show_data_summary()
save_data_name = save_model_dir +".dset"
save_data_setting(data, save_data_name)
model = SeqModel(data)
print "finished built model."
print("finished built model.")
loss_function = nn.NLLLoss()
parameters = filter(lambda p: p.requires_grad, model.parameters())
parameters = [p for p in model.parameters() if p.requires_grad]
optimizer = optim.SGD(parameters, lr=data.HP_lr, momentum=data.HP_momentum)
best_dev = -1
data.HP_iteration = 100
## start training
for idx in range(data.HP_iteration):
epoch_start = time.time()
temp_start = epoch_start
print("Epoch: %s/%s" %(idx,data.HP_iteration))
print(("Epoch: %s/%s" %(idx,data.HP_iteration)))
optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr)
instance_count = 0
sample_id = 0
Expand Down Expand Up @@ -290,7 +290,7 @@ def train(data, save_model_dir, seg=True):
temp_time = time.time()
temp_cost = temp_time - temp_start
temp_start = temp_time
print(" Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"%(end, temp_cost, sample_loss, right_token, whole_token,(right_token+0.)/whole_token))
print((" Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"%(end, temp_cost, sample_loss, right_token, whole_token,(right_token+0.)/whole_token)))
sys.stdout.flush()
sample_loss = 0
if end%data.HP_batch_size == 0:
Expand All @@ -300,10 +300,10 @@ def train(data, save_model_dir, seg=True):
batch_loss = 0
temp_time = time.time()
temp_cost = temp_time - temp_start
print(" Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"%(end, temp_cost, sample_loss, right_token, whole_token,(right_token+0.)/whole_token))
print((" Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"%(end, temp_cost, sample_loss, right_token, whole_token,(right_token+0.)/whole_token)))
epoch_finish = time.time()
epoch_cost = epoch_finish - epoch_start
print("Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s"%(idx, epoch_cost, train_num/epoch_cost, total_loss))
print(("Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s"%(idx, epoch_cost, train_num/epoch_cost, total_loss)))
# exit(0)
# continue
speed, acc, p, r, f, _ = evaluate(data, model, "dev")
Expand All @@ -312,16 +312,16 @@ def train(data, save_model_dir, seg=True):

if seg:
current_score = f
print("Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"%(dev_cost, speed, acc, p, r, f))
print(("Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"%(dev_cost, speed, acc, p, r, f)))
else:
current_score = acc
print("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f"%(dev_cost, speed, acc))
print(("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f"%(dev_cost, speed, acc)))

if current_score > best_dev:
if seg:
print "Exceed previous best f score:", best_dev
print("Exceed previous best f score:", best_dev)
else:
print "Exceed previous best acc score:", best_dev
print("Exceed previous best acc score:", best_dev)
model_name = save_model_dir +'.'+ str(idx) + ".model"
torch.save(model.state_dict(), model_name)
best_dev = current_score
Expand All @@ -330,15 +330,15 @@ def train(data, save_model_dir, seg=True):
test_finish = time.time()
test_cost = test_finish - dev_finish
if seg:
print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"%(test_cost, speed, acc, p, r, f))
print(("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"%(test_cost, speed, acc, p, r, f)))
else:
print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f"%(test_cost, speed, acc))
print(("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f"%(test_cost, speed, acc)))
gc.collect()


def load_model_decode(model_dir, data, name, gpu, seg=True):
data.HP_gpu = gpu
print "Load Model from file: ", model_dir
print("Load Model from file: ", model_dir)
model = SeqModel(data)
## load model need consider if the model trained in GPU and load in CPU, or vice versa
# if not gpu:
Expand All @@ -348,15 +348,15 @@ def load_model_decode(model_dir, data, name, gpu, seg=True):
model.load_state_dict(torch.load(model_dir))
# model = torch.load(model_dir)

print("Decode %s data ..."%(name))
print(("Decode %s data ..."%(name)))
start_time = time.time()
speed, acc, p, r, f, pred_results = evaluate(data, model, name)
end_time = time.time()
time_cost = end_time - start_time
if seg:
print("%s: time:%.2fs, speed:%.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"%(name, time_cost, speed, acc, p, r, f))
print(("%s: time:%.2fs, speed:%.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"%(name, time_cost, speed, acc, p, r, f)))
else:
print("%s: time:%.2fs, speed:%.2fst/s; acc: %.4f"%(name, time_cost, speed, acc))
print(("%s: time:%.2fs, speed:%.2fst/s; acc: %.4f"%(name, time_cost, speed, acc)))
return pred_results


Expand Down Expand Up @@ -401,20 +401,20 @@ def load_model_decode(model_dir, data, name, gpu, seg=True):
# char_emb = None
#bichar_emb = None

print "CuDNN:", torch.backends.cudnn.enabled
print("CuDNN:", torch.backends.cudnn.enabled)
# gpu = False
print "GPU available:", gpu
print "Status:", status
print "Seg: ", seg
print "Train file:", train_file
print "Dev file:", dev_file
print "Test file:", test_file
print "Raw file:", raw_file
print "Char emb:", char_emb
print "Bichar emb:", bichar_emb
print "Gaz file:",gaz_file
print("GPU available:", gpu)
print("Status:", status)
print("Seg: ", seg)
print("Train file:", train_file)
print("Dev file:", dev_file)
print("Test file:", test_file)
print("Raw file:", raw_file)
print("Char emb:", char_emb)
print("Bichar emb:", bichar_emb)
print("Gaz file:",gaz_file)
if status == 'train':
print "Model saved to:", save_model_dir
print("Model saved to:", save_model_dir)
sys.stdout.flush()

if status == 'train':
Expand Down Expand Up @@ -446,7 +446,7 @@ def load_model_decode(model_dir, data, name, gpu, seg=True):
decode_results = load_model_decode(model_dir, data, 'raw', gpu, seg)
data.write_decoded_results(output_file, decode_results, 'raw')
else:
print "Invalid argument! Please use valid arguments! (train/test/decode)"
print("Invalid argument! Please use valid arguments! (train/test/decode)")



Expand Down
10 changes: 5 additions & 5 deletions model/bilstm.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@
import numpy as np
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
# from kblayer import GazLayer
from charbilstm import CharBiLSTM
from charcnn import CharCNN
from latticelstm import LatticeLSTM
from .charbilstm import CharBiLSTM
from .charcnn import CharCNN
from .latticelstm import LatticeLSTM

class BiLSTM(nn.Module):
def __init__(self, data):
super(BiLSTM, self).__init__()
print "build batched bilstm..."
print("build batched bilstm...")
self.use_bigram = data.use_bigram
self.gpu = data.HP_gpu
self.use_char = data.HP_use_char
Expand All @@ -32,7 +32,7 @@ def __init__(self, data):
elif data.char_features == "LSTM":
self.char_feature = CharBiLSTM(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu)
else:
print "Error char feature selection, please check parameter data.char_features (either CNN or LSTM)."
print("Error char feature selection, please check parameter data.char_features (either CNN or LSTM).")
exit(0)
self.embedding_dim = data.word_emb_dim
self.hidden_dim = data.HP_hidden_dim
Expand Down
6 changes: 3 additions & 3 deletions model/bilstmcrf.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from bilstm import BiLSTM
from crf import CRF
from .bilstm import BiLSTM
from .crf import CRF

class BiLSTM_CRF(nn.Module):
def __init__(self, data):
super(BiLSTM_CRF, self).__init__()
print "build batched lstmcrf..."
print("build batched lstmcrf...")
self.gpu = data.HP_gpu
## add two more label for downlayer lstm, use original label size for CRF
label_size = data.label_alphabet_size
Expand Down
2 changes: 1 addition & 1 deletion model/charbilstm.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
class CharBiLSTM(nn.Module):
def __init__(self, alphabet_size, embedding_dim, hidden_dim, dropout, gpu, bidirect_flag = True):
super(CharBiLSTM, self).__init__()
print "build batched char bilstm..."
print("build batched char bilstm...")
self.gpu = gpu
self.hidden_dim = hidden_dim
if bidirect_flag:
Expand Down
2 changes: 1 addition & 1 deletion model/charcnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
class CharCNN(nn.Module):
def __init__(self, alphabet_size, embedding_dim, hidden_dim, dropout, gpu):
super(CharCNN, self).__init__()
print "build batched char cnn..."
print("build batched char cnn...")
self.gpu = gpu
self.hidden_dim = hidden_dim
self.char_drop = nn.Dropout(dropout)
Expand Down
9 changes: 5 additions & 4 deletions model/crf.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class CRF(nn.Module):

def __init__(self, tagset_size, gpu):
super(CRF, self).__init__()
print "build batched crf..."
print("build batched crf...")
self.gpu = gpu
# Matrix of transition parameters. Entry i,j is the score of transitioning *to* i *from* j.
self.average_batch = False
Expand Down Expand Up @@ -70,7 +70,7 @@ def _calculate_PZ(self, feats, mask):
scores = scores.view(seq_len, batch_size, tag_size, tag_size)
# build iter
seq_iter = enumerate(scores)
_, inivalues = seq_iter.next() # bat_size * from_target_size * to_target_size
_, inivalues = next(seq_iter) # bat_size * from_target_size * to_target_size
# only need start from start_tag
partition = inivalues[:, START_TAG, :].clone().view(batch_size, tag_size, 1) # bat_size * to_target_size

Expand Down Expand Up @@ -138,9 +138,10 @@ def _viterbi_decode(self, feats, mask):
## reverse mask (bug for mask = 1- mask, use this as alternative choice)
# mask = 1 + (-1)*mask
mask = (1 - mask.long()).byte()
_, inivalues = seq_iter.next() # bat_size * from_target_size * to_target_size
_, inivalues = next(seq_iter) # bat_size * from_target_size * to_target_size
# only need start from start_tag
partition = inivalues[:, START_TAG, :].clone().view(batch_size, tag_size, 1) # bat_size * to_target_size
# partition = inivalues[:, START_TAG, :].clone().view(batch_size, tag_size, 1) # bat_size * to_target_size
partition = inivalues[:, START_TAG, :].clone().view(batch_size, tag_size) # bat_size * to_target_size
partition_history.append(partition)
# iter over last scores
for idx, cur_values in seq_iter:
Expand Down
6 changes: 3 additions & 3 deletions model/latticelstm.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,12 +176,12 @@ class LatticeLSTM(nn.Module):
def __init__(self, input_dim, hidden_dim, word_drop, word_alphabet_size, word_emb_dim, pretrain_word_emb=None, left2right=True, fix_word_emb=True, gpu=True, use_bias = True):
super(LatticeLSTM, self).__init__()
skip_direction = "forward" if left2right else "backward"
print "build LatticeLSTM... ", skip_direction, ", Fix emb:", fix_word_emb, " gaz drop:", word_drop
print("build LatticeLSTM... ", skip_direction, ", Fix emb:", fix_word_emb, " gaz drop:", word_drop)
self.gpu = gpu
self.hidden_dim = hidden_dim
self.word_emb = nn.Embedding(word_alphabet_size, word_emb_dim)
if pretrain_word_emb is not None:
print "load pretrain word emb...", pretrain_word_emb.shape
print("load pretrain word emb...", pretrain_word_emb.shape)
self.word_emb.weight.data.copy_(torch.from_numpy(pretrain_word_emb))

else:
Expand Down Expand Up @@ -233,7 +233,7 @@ def forward(self, input, skip_input_list, hidden=None):
hx = hx.cuda()
cx = cx.cuda()

id_list = range(seq_len)
id_list = list(range(seq_len))
if not self.left2right:
id_list = list(reversed(id_list))
input_c_list = init_list_of_objects(seq_len)
Expand Down
6 changes: 3 additions & 3 deletions utils/alphabet.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,12 @@ def size(self):
return len(self.instances) + 1

def iteritems(self):
return self.instance2index.iteritems()
return iter(self.instance2index.items())

def enumerate_items(self, start=1):
if start < 1 or start >= self.size():
raise IndexError("Enumerate is allowed between [1 : size of the alphabet)")
return zip(range(start, len(self.instances) + 1), self.instances[start - 1:])
return list(zip(list(range(start, len(self.instances) + 1)), self.instances[start - 1:]))

def close(self):
self.keep_growing = False
Expand All @@ -101,7 +101,7 @@ def save(self, output_directory, name=None):
try:
json.dump(self.get_content(), open(os.path.join(output_directory, saving_name + ".json"), 'w'))
except Exception as e:
print("Exception: Alphabet is not saved: " % repr(e))
print(("Exception: Alphabet is not saved: " % repr(e)))

def load(self, input_directory, name=None):
"""
Expand Down
Loading