First commit
This commit is contained in:
commit
b7e1e0fa0f
98 changed files with 42749 additions and 0 deletions
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
|
@ -0,0 +1,5 @@
|
|||
*.pyc
|
||||
__pycache__/
|
||||
.eggs/
|
||||
*.egg-info/
|
||||
.cache
|
6
pytorch/.gitignore
vendored
Normal file
6
pytorch/.gitignore
vendored
Normal file
|
@ -0,0 +1,6 @@
|
|||
*.pyc
|
||||
__pycache__/
|
||||
.eggs/
|
||||
*.egg-info/
|
||||
.cache
|
||||
data/
|
0
pytorch/CIFAR10/benchmark/__init__.py
Normal file
0
pytorch/CIFAR10/benchmark/__init__.py
Normal file
140
pytorch/CIFAR10/benchmark/infer.py
Normal file
140
pytorch/CIFAR10/benchmark/infer.py
Normal file
|
@ -0,0 +1,140 @@
|
|||
import os
|
||||
import timeit
|
||||
from glob import glob
|
||||
from collections import OrderedDict
|
||||
|
||||
import click
|
||||
import torch
|
||||
import numpy as np
|
||||
from torch.autograd import Variable
|
||||
from torchvision import transforms
|
||||
from torchvision import datasets
|
||||
|
||||
from benchmark.train import load, MEAN, STD, save_result, MODELS
|
||||
|
||||
|
||||
class PyTorchEngine:
|
||||
def __init__(self, filename, use_cuda=False, name=None):
|
||||
self.filename = filename
|
||||
self.use_cuda = use_cuda
|
||||
self.name = name
|
||||
model, epoch, accuracy = load(self.filename)
|
||||
|
||||
if self.use_cuda:
|
||||
self.model = model.cuda()
|
||||
else:
|
||||
self.model = model.cpu()
|
||||
self.epoch = epoch
|
||||
self.accuracy = accuracy
|
||||
|
||||
def pred(self, inputs):
|
||||
inputs = Variable(inputs, requires_grad=False, volatile=True)
|
||||
|
||||
if self.use_cuda:
|
||||
inputs = inputs.cuda()
|
||||
return self.model(inputs).data.cpu().numpy()
|
||||
else:
|
||||
return self.model(inputs).data.numpy()
|
||||
|
||||
|
||||
def time_batch_size(dataset, batch_size, pred, use_cuda, repeat=100, bestof=3):
|
||||
loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
|
||||
shuffle=False, pin_memory=use_cuda)
|
||||
inputs, targets = loader.__iter__().next()
|
||||
assert inputs.size(0) == batch_size
|
||||
|
||||
times = timeit.repeat('pred(inputs)', globals=locals(),
|
||||
repeat=repeat, number=1)
|
||||
|
||||
return times
|
||||
|
||||
|
||||
def infer_cifar10(dataset, engine, start=1, end=128, repeat=100, log2=True,
|
||||
output=None):
|
||||
if log2:
|
||||
start = int(np.floor(np.log2(start)))
|
||||
end = int(np.ceil(np.log2(end)))
|
||||
assert start >= 0
|
||||
assert end >= start
|
||||
batch_sizes = map(lambda x: 2**x, range(start, end + 1))
|
||||
else:
|
||||
batch_sizes = range(start, end + 1)
|
||||
results = []
|
||||
for batch_size in batch_sizes:
|
||||
times = time_batch_size(dataset, batch_size, engine.pred,
|
||||
engine.use_cuda, repeat=repeat)
|
||||
|
||||
result = OrderedDict()
|
||||
result['nodename'] = os.uname().nodename
|
||||
result['model'] = engine.name
|
||||
result['use_cuda'] = engine.use_cuda
|
||||
result['batch_size'] = batch_size
|
||||
result['mean'] = np.mean(times)
|
||||
result['std'] = np.std(times)
|
||||
result['throughput'] = batch_size / np.mean(times)
|
||||
result['filename'] = engine.filename
|
||||
if output is not None:
|
||||
save_result(result, output)
|
||||
|
||||
print('batch_size: {batch_size:4d}'
|
||||
' - mean: {mean:.4f}'
|
||||
' - std: {std:.4f}'
|
||||
' - throughput: {throughput:.4f}'.format(**result))
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option('--dataset-dir', default='./data/cifar10')
|
||||
@click.option('--run-dir', default='./run/')
|
||||
@click.option('--output-file', default='inference.csv')
|
||||
@click.option('--start', '-s', default=1)
|
||||
@click.option('--end', '-e', default=128)
|
||||
@click.option('--repeat', '-r', default=100)
|
||||
@click.option('--log2/--no-log2', default=True)
|
||||
@click.option('--cpu/--no-cpu', default=True)
|
||||
@click.option('--gpu/--no-gpu', default=True)
|
||||
@click.option('--append', is_flag=True)
|
||||
@click.option('--models', '-m', type=click.Choice(MODELS.keys()),
|
||||
multiple=True)
|
||||
def infer(dataset_dir, run_dir, output_file, start, end, repeat, log2,
|
||||
cpu, gpu, append, models):
|
||||
|
||||
transform_test = transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize(MEAN, STD)
|
||||
])
|
||||
|
||||
testset = datasets.CIFAR10(root=dataset_dir, train=False, download=True,
|
||||
transform=transform_test)
|
||||
models = models or os.listdir(run_dir)
|
||||
output_path = os.path.join(run_dir, output_file)
|
||||
assert not os.path.exists(output_path) or append
|
||||
for model in models:
|
||||
model_dir = os.path.join(run_dir, model)
|
||||
paths = glob(f"{model_dir}/*/checkpoint_best_model.t7")
|
||||
assert len(paths) > 0
|
||||
path = os.path.abspath(paths[0])
|
||||
|
||||
print(f'Model: {model}')
|
||||
print(f'Path: {path}')
|
||||
|
||||
if cpu:
|
||||
print('With CPU:')
|
||||
engine = PyTorchEngine(path, use_cuda=False, name=model)
|
||||
infer_cifar10(testset, engine, start=start, end=end, log2=log2,
|
||||
repeat=repeat, output=output_path)
|
||||
|
||||
if gpu and torch.cuda.is_available():
|
||||
print('With GPU:')
|
||||
engine = PyTorchEngine(path, use_cuda=True, name=model)
|
||||
# Warmup
|
||||
time_batch_size(testset, 1, engine.pred, engine.use_cuda, repeat=1)
|
||||
|
||||
infer_cifar10(testset, engine, start=start, end=end, log2=log2,
|
||||
repeat=repeat, output=output_path)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
infer()
|
108
pytorch/CIFAR10/benchmark/models/densenet.py
Normal file
108
pytorch/CIFAR10/benchmark/models/densenet.py
Normal file
|
@ -0,0 +1,108 @@
|
|||
'''DenseNet in PyTorch.'''
|
||||
import math
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
class Bottleneck(nn.Module):
|
||||
def __init__(self, in_planes, growth_rate):
|
||||
super(Bottleneck, self).__init__()
|
||||
self.bn1 = nn.BatchNorm2d(in_planes)
|
||||
self.conv1 = nn.Conv2d(in_planes, 4 * growth_rate, kernel_size=1, bias=False)
|
||||
self.bn2 = nn.BatchNorm2d(4 * growth_rate)
|
||||
self.conv2 = nn.Conv2d(4 * growth_rate, growth_rate, kernel_size=3, padding=1, bias=False)
|
||||
|
||||
def forward(self, x):
|
||||
out = self.conv1(F.relu(self.bn1(x)))
|
||||
out = self.conv2(F.relu(self.bn2(out)))
|
||||
out = torch.cat([out, x], 1)
|
||||
return out
|
||||
|
||||
|
||||
class Transition(nn.Module):
|
||||
def __init__(self, in_planes, out_planes, last=False, pool_size=2):
|
||||
super(Transition, self).__init__()
|
||||
self.last = last
|
||||
self.pool_size = pool_size
|
||||
self.bn = nn.BatchNorm2d(in_planes)
|
||||
if not self.last:
|
||||
self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False)
|
||||
|
||||
def forward(self, x):
|
||||
out = F.relu(self.bn(x))
|
||||
if not self.last:
|
||||
out = self.conv(out)
|
||||
out = F.avg_pool2d(out, self.pool_size)
|
||||
return out
|
||||
|
||||
|
||||
class DenseNet(nn.Module):
|
||||
def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=10):
|
||||
super(DenseNet, self).__init__()
|
||||
# TODO: Add drop for CIFAR10 without data augmentation
|
||||
self.growth_rate = growth_rate
|
||||
|
||||
num_planes = 2 * growth_rate
|
||||
self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, padding=1, bias=False)
|
||||
|
||||
self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0])
|
||||
num_planes += nblocks[0] * growth_rate
|
||||
out_planes = int(math.floor(num_planes*reduction))
|
||||
self.trans1 = Transition(num_planes, out_planes)
|
||||
num_planes = out_planes
|
||||
|
||||
self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1])
|
||||
num_planes += nblocks[1] * growth_rate
|
||||
out_planes = int(math.floor(num_planes*reduction))
|
||||
self.trans2 = Transition(num_planes, out_planes)
|
||||
num_planes = out_planes
|
||||
|
||||
self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2])
|
||||
num_planes += nblocks[2] * growth_rate
|
||||
self.trans3 = Transition(num_planes, num_planes, last=True, pool_size=8)
|
||||
|
||||
self.linear = nn.Linear(num_planes, num_classes)
|
||||
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
|
||||
m.weight.data.normal_(0, math.sqrt(2. / n))
|
||||
elif isinstance(m, nn.BatchNorm2d):
|
||||
m.weight.data.fill_(1)
|
||||
m.bias.data.zero_()
|
||||
|
||||
def _make_dense_layers(self, block, in_planes, nblock):
|
||||
layers = []
|
||||
for i in range(nblock):
|
||||
layers.append(block(in_planes, self.growth_rate))
|
||||
in_planes += self.growth_rate
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def forward(self, x):
|
||||
out = self.conv1(x)
|
||||
out = self.trans1(self.dense1(out))
|
||||
out = self.trans2(self.dense2(out))
|
||||
out = self.trans3(self.dense3(out))
|
||||
out = out.view(out.size(0), -1)
|
||||
out = self.linear(out)
|
||||
return out
|
||||
|
||||
|
||||
def DenseNetBC(L, k):
|
||||
assert (L - 4) % 6 == 0
|
||||
num_blocks = int((L - 4) / 6)
|
||||
return DenseNet(Bottleneck, [num_blocks] * 3, growth_rate=k, reduction=0.5)
|
||||
|
||||
|
||||
def DenseNetBC100():
|
||||
return DenseNetBC(100, 12)
|
||||
|
||||
|
||||
def DenseNetBC250():
|
||||
return DenseNetBC(250, 24)
|
||||
|
||||
|
||||
def DenseNetBC190():
|
||||
return DenseNetBC(190, 40)
|
372
pytorch/CIFAR10/benchmark/models/resnet.py
Normal file
372
pytorch/CIFAR10/benchmark/models/resnet.py
Normal file
|
@ -0,0 +1,372 @@
|
|||
import math
|
||||
from functools import partial
|
||||
|
||||
from torch import nn
|
||||
from torch.nn import functional as F
|
||||
|
||||
|
||||
class BasicBlock(nn.Module):
|
||||
expansion = 1
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1):
|
||||
super().__init__()
|
||||
self.conv1 = nn.Conv2d(inplanes, planes, 3, stride=stride, padding=1,
|
||||
bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(planes)
|
||||
|
||||
self.conv2 = nn.Conv2d(planes, planes, 3, padding=1, bias=False)
|
||||
self.bn2 = nn.BatchNorm2d(planes)
|
||||
|
||||
if stride != 1 or inplanes != (planes * self.expansion):
|
||||
self.shortcut = nn.Sequential(
|
||||
nn.Conv2d(inplanes, planes * self.expansion, 1, stride=stride,
|
||||
bias=False),
|
||||
nn.BatchNorm2d(planes * self.expansion)
|
||||
)
|
||||
else:
|
||||
self.shortcut = nn.Sequential()
|
||||
|
||||
def forward(self, inputs):
|
||||
H = self.conv1(inputs)
|
||||
H = self.bn1(H)
|
||||
H = F.relu(H)
|
||||
|
||||
H = self.conv2(H)
|
||||
H = self.bn2(H)
|
||||
|
||||
H += self.shortcut(inputs)
|
||||
outputs = F.relu(H)
|
||||
|
||||
return outputs
|
||||
|
||||
|
||||
class PreActBlock(nn.Module):
|
||||
expansion = 1
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1):
|
||||
super().__init__()
|
||||
self.bn1 = nn.BatchNorm2d(inplanes)
|
||||
self.conv1 = nn.Conv2d(inplanes, planes, 3, stride=stride, padding=1,
|
||||
bias=False)
|
||||
|
||||
self.bn2 = nn.BatchNorm2d(planes)
|
||||
self.conv2 = nn.Conv2d(planes, planes, 3, padding=1, bias=False)
|
||||
|
||||
self.increasing = stride != 1 or inplanes != (planes * self.expansion)
|
||||
if self.increasing:
|
||||
self.shortcut = nn.Sequential(
|
||||
nn.Conv2d(inplanes, planes * self.expansion, 1, stride=stride,
|
||||
bias=False)
|
||||
)
|
||||
else:
|
||||
self.shortcut = nn.Sequential()
|
||||
|
||||
def forward(self, inputs):
|
||||
H = self.bn1(inputs)
|
||||
H = F.relu(H)
|
||||
if self.increasing:
|
||||
inputs = H
|
||||
H = self.conv1(H)
|
||||
|
||||
H = self.bn2(H)
|
||||
H = F.relu(H)
|
||||
H = self.conv2(H)
|
||||
|
||||
H += self.shortcut(inputs)
|
||||
return H
|
||||
|
||||
|
||||
class Bottleneck(nn.Module):
|
||||
expansion = 4
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1):
|
||||
super().__init__()
|
||||
self.conv1 = nn.Conv2d(inplanes, planes, 1, bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(planes)
|
||||
|
||||
self.conv2 = nn.Conv2d(planes, planes, 3, stride=stride,
|
||||
padding=1, bias=False)
|
||||
self.bn2 = nn.BatchNorm2d(planes)
|
||||
|
||||
self.conv3 = nn.Conv2d(planes, planes * 4, 1, bias=False)
|
||||
self.bn3 = nn.BatchNorm2d(planes * 4)
|
||||
|
||||
if stride != 1 or inplanes != (planes * self.expansion):
|
||||
self.shortcut = nn.Sequential(
|
||||
nn.Conv2d(inplanes, planes * self.expansion, 1, stride=stride,
|
||||
bias=False),
|
||||
nn.BatchNorm2d(planes * self.expansion)
|
||||
)
|
||||
else:
|
||||
self.shortcut = nn.Sequential()
|
||||
|
||||
def forward(self, inputs):
|
||||
H = self.conv1(inputs)
|
||||
H = self.bn1(H)
|
||||
H = F.relu(H)
|
||||
|
||||
H = self.conv2(H)
|
||||
H = self.bn2(H)
|
||||
H = F.relu(H)
|
||||
|
||||
H = self.conv3(H)
|
||||
H = self.bn3(H)
|
||||
|
||||
H += self.shortcut(inputs)
|
||||
outputs = F.relu(H)
|
||||
|
||||
return outputs
|
||||
|
||||
|
||||
class ResNeXtBottleneck(nn.Module):
|
||||
expansion = 4
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1, cardinality=32,
|
||||
base_width=4):
|
||||
super().__init__()
|
||||
|
||||
width = math.floor(planes * (base_width / 64.0))
|
||||
|
||||
self.conv1 = nn.Conv2d(inplanes, width * cardinality, 1, bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(width * cardinality)
|
||||
|
||||
self.conv2 = nn.Conv2d(width * cardinality, width * cardinality, 3,
|
||||
groups=cardinality, padding=1, stride=stride,
|
||||
bias=False)
|
||||
self.bn2 = nn.BatchNorm2d(width * cardinality)
|
||||
|
||||
self.conv3 = nn.Conv2d(width * cardinality, planes * 4, 1, bias=False)
|
||||
self.bn3 = nn.BatchNorm2d(planes * 4)
|
||||
|
||||
if stride != 1 or inplanes != (planes * self.expansion):
|
||||
self.shortcut = nn.Sequential(
|
||||
nn.Conv2d(inplanes, planes * self.expansion, 1, stride=stride,
|
||||
bias=False),
|
||||
nn.BatchNorm2d(planes * self.expansion)
|
||||
)
|
||||
else:
|
||||
self.shortcut = nn.Sequential()
|
||||
|
||||
def forward(self, inputs):
|
||||
H = self.conv1(inputs)
|
||||
H = self.bn1(H)
|
||||
H = F.relu(H)
|
||||
|
||||
H = self.conv2(H)
|
||||
H = self.bn2(H)
|
||||
H = F.relu(H)
|
||||
|
||||
H = self.conv3(H)
|
||||
H = self.bn3(H)
|
||||
|
||||
H += self.shortcut(inputs)
|
||||
outputs = F.relu(H)
|
||||
|
||||
return outputs
|
||||
|
||||
|
||||
class PreActBottleneck(nn.Module):
|
||||
expansion = 4
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1):
|
||||
super().__init__()
|
||||
self.bn1 = nn.BatchNorm2d(inplanes)
|
||||
self.conv1 = nn.Conv2d(inplanes, planes, 1, bias=False)
|
||||
|
||||
self.bn2 = nn.BatchNorm2d(planes)
|
||||
self.conv2 = nn.Conv2d(planes, planes, 3, padding=1, stride=stride,
|
||||
bias=False)
|
||||
|
||||
self.bn3 = nn.BatchNorm2d(planes)
|
||||
self.conv3 = nn.Conv2d(planes, planes * 4, 1, bias=False)
|
||||
|
||||
self.increasing = stride != 1 or inplanes != (planes * self.expansion)
|
||||
if self.increasing:
|
||||
self.shortcut = nn.Sequential(
|
||||
nn.Conv2d(inplanes, planes * self.expansion, 1, stride=stride,
|
||||
bias=False)
|
||||
)
|
||||
else:
|
||||
self.shortcut = nn.Sequential()
|
||||
|
||||
def forward(self, inputs):
|
||||
H = self.bn1(inputs)
|
||||
H = F.relu(H)
|
||||
if self.increasing:
|
||||
inputs = H
|
||||
H = self.conv1(H)
|
||||
|
||||
H = self.bn2(H)
|
||||
H = F.relu(H)
|
||||
H = self.conv2(H)
|
||||
|
||||
H = self.bn3(H)
|
||||
H = F.relu(H)
|
||||
H = self.conv3(H)
|
||||
|
||||
H += self.shortcut(inputs)
|
||||
return H
|
||||
|
||||
|
||||
class ResNet(nn.Module):
|
||||
|
||||
def __init__(self, Block, layers, filters, num_classes=10, inplanes=None):
|
||||
self.inplanes = inplanes or filters[0]
|
||||
super().__init__()
|
||||
|
||||
self.pre_act = 'Pre' in Block.__name__
|
||||
|
||||
self.conv1 = nn.Conv2d(3, self.inplanes, 3, padding=1, bias=False)
|
||||
if not self.pre_act:
|
||||
self.bn1 = nn.BatchNorm2d(self.inplanes)
|
||||
|
||||
self.num_sections = len(layers)
|
||||
for section_index, (size, planes) in enumerate(zip(layers, filters)):
|
||||
section = []
|
||||
for layer_index in range(size):
|
||||
if section_index != 0 and layer_index == 0:
|
||||
stride = 2
|
||||
else:
|
||||
stride = 1
|
||||
section.append(Block(self.inplanes, planes, stride=stride))
|
||||
self.inplanes = planes * Block.expansion
|
||||
section = nn.Sequential(*section)
|
||||
setattr(self, f'section_{section_index}', section)
|
||||
|
||||
if self.pre_act:
|
||||
self.bn1 = nn.BatchNorm2d(self.inplanes)
|
||||
|
||||
self.fc = nn.Linear(filters[-1] * Block.expansion, num_classes)
|
||||
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
|
||||
m.weight.data.normal_(0, math.sqrt(2. / n))
|
||||
elif isinstance(m, nn.BatchNorm2d):
|
||||
m.weight.data.fill_(1)
|
||||
m.bias.data.zero_()
|
||||
|
||||
def forward(self, inputs):
|
||||
H = self.conv1(inputs)
|
||||
|
||||
if not self.pre_act:
|
||||
H = self.bn1(H)
|
||||
H = F.relu(H)
|
||||
|
||||
for section_index in range(self.num_sections):
|
||||
H = getattr(self, f'section_{section_index}')(H)
|
||||
|
||||
if self.pre_act:
|
||||
H = self.bn1(H)
|
||||
H = F.relu(H)
|
||||
|
||||
H = F.avg_pool2d(H, H.size()[2:])
|
||||
H = H.view(H.size(0), -1)
|
||||
outputs = self.fc(H)
|
||||
|
||||
return outputs
|
||||
|
||||
|
||||
# From "Deep Residual Learning for Image Recognition"
|
||||
def ResNet20():
|
||||
return ResNet(BasicBlock, layers=[3] * 3, filters=[16, 32, 64])
|
||||
|
||||
|
||||
def ResNet32():
|
||||
return ResNet(BasicBlock, layers=[5] * 3, filters=[16, 32, 64])
|
||||
|
||||
|
||||
def ResNet44():
|
||||
return ResNet(BasicBlock, layers=[7] * 3, filters=[16, 32, 64])
|
||||
|
||||
|
||||
def ResNet56():
|
||||
return ResNet(BasicBlock, layers=[9] * 3, filters=[16, 32, 64])
|
||||
|
||||
|
||||
def ResNet110():
|
||||
return ResNet(BasicBlock, layers=[18] * 3, filters=[16, 32, 64])
|
||||
|
||||
|
||||
def ResNet1202():
|
||||
return ResNet(BasicBlock, layers=[200] * 3, filters=[16, 32, 64])
|
||||
|
||||
|
||||
# Based on but not it "Identity Mappings in Deep Residual Networks"
|
||||
def PreActResNet20():
|
||||
return ResNet(PreActBlock, layers=[3] * 3, filters=[16, 32, 64])
|
||||
|
||||
|
||||
def PreActResNet56():
|
||||
return ResNet(PreActBlock, layers=[9] * 3, filters=[16, 32, 64])
|
||||
|
||||
|
||||
def PreActResNet164Basic():
|
||||
return ResNet(PreActBlock, layers=[27] * 3, filters=[16, 32, 64])
|
||||
|
||||
|
||||
# From "Identity Mappings in Deep Residual Networks"
|
||||
def PreActResNet110():
|
||||
return ResNet(PreActBlock, layers=[18] * 3, filters=[16, 32, 64])
|
||||
|
||||
|
||||
def PreActResNet164():
|
||||
return ResNet(PreActBottleneck, layers=[18] * 3, filters=[16, 32, 64])
|
||||
|
||||
|
||||
def PreActResNet1001():
|
||||
return ResNet(PreActBottleneck, layers=[111] * 3, filters=[16, 32, 64])
|
||||
|
||||
|
||||
# From "Wide Residual Networks"
|
||||
def WRN(n, k):
|
||||
assert (n - 4) % 6 == 0
|
||||
base_filters = [16, 32, 64]
|
||||
filters = [num_filters * k for num_filters in base_filters]
|
||||
d = (n - 4) / 2 # l = 2
|
||||
return ResNet(PreActBlock, layers=[int(d / 3)] * 3, filters=filters,
|
||||
inplanes=16)
|
||||
|
||||
|
||||
def WRN_40_4():
|
||||
return WRN(40, 4)
|
||||
|
||||
|
||||
def WRN_16_8():
|
||||
return WRN(16, 8)
|
||||
|
||||
|
||||
def WRN_28_10():
|
||||
return WRN(28, 10)
|
||||
|
||||
|
||||
# From "Aggregated Residual Transformations for Deep Neural Networks"
|
||||
def ResNeXt29(cardinality, base_width):
|
||||
Block = partial(ResNeXtBottleneck, cardinality=cardinality,
|
||||
base_width=base_width)
|
||||
Block.__name__ = ResNeXtBottleneck.__name__
|
||||
Block.expansion = ResNeXtBottleneck.expansion
|
||||
return ResNet(Block, layers=[3, 3, 3], filters=[64, 128, 256])
|
||||
|
||||
|
||||
# From kunagliu/pytorch
|
||||
def ResNet18():
|
||||
return ResNet(BasicBlock, layers=[2, 2, 2, 2], filters=[64, 128, 256, 512])
|
||||
|
||||
|
||||
def ResNet34():
|
||||
return ResNet(BasicBlock, layers=[3, 4, 6, 3], filters=[64, 128, 256, 512])
|
||||
|
||||
|
||||
def ResNet50():
|
||||
return ResNet(Bottleneck, layers=[3, 4, 6, 3], filters=[64, 128, 256, 512])
|
||||
|
||||
|
||||
def ResNet101():
|
||||
return ResNet(Bottleneck,
|
||||
layers=[3, 4, 23, 3], filters=[64, 128, 256, 512])
|
||||
|
||||
|
||||
def ResNet152():
|
||||
return ResNet(Bottleneck,
|
||||
layers=[3, 8, 36, 3], filters=[64, 128, 256, 512])
|
336
pytorch/CIFAR10/benchmark/train.py
Normal file
336
pytorch/CIFAR10/benchmark/train.py
Normal file
|
@ -0,0 +1,336 @@
|
|||
import os
|
||||
import re
|
||||
import json
|
||||
from functools import reduce
|
||||
from datetime import datetime
|
||||
from collections import OrderedDict
|
||||
|
||||
import click
|
||||
import torch
|
||||
import progressbar
|
||||
from torch import nn, optim
|
||||
from torch.autograd import Variable
|
||||
from torchvision import transforms
|
||||
from torchvision import datasets as dset
|
||||
|
||||
from benchmark.models import resnet, densenet
|
||||
|
||||
MEAN = (0.4914, 0.4822, 0.4465)
|
||||
STD = (0.2023, 0.1994, 0.2010)
|
||||
|
||||
MODELS = {
|
||||
# "Deep Residual Learning for Image Recognition"
|
||||
'resnet20': resnet.ResNet20,
|
||||
'resnet32': resnet.ResNet32,
|
||||
'resnet44': resnet.ResNet44,
|
||||
'resnet56': resnet.ResNet56,
|
||||
'resnet110': resnet.ResNet110,
|
||||
'resnet1202': resnet.ResNet1202,
|
||||
|
||||
# "Wide Residual Networks"
|
||||
'wrn-40-4': resnet.WRN_40_4,
|
||||
'wrn-16-8': resnet.WRN_16_8,
|
||||
'wrn-28-10': resnet.WRN_28_10,
|
||||
|
||||
# Based on "Identity Mappings in Deep Residual Networks"
|
||||
'preact20': resnet.PreActResNet20,
|
||||
'preact56': resnet.PreActResNet56,
|
||||
'preact164-basic': resnet.PreActResNet164Basic,
|
||||
|
||||
# "Identity Mappings in Deep Residual Networks"
|
||||
'preact110': resnet.PreActResNet110,
|
||||
'preact164': resnet.PreActResNet164,
|
||||
'preact1001': resnet.PreActResNet1001,
|
||||
|
||||
# "Aggregated Residual Transformations for Deep Neural Networks"
|
||||
'resnext29-8-64': lambda _=None: resnet.ResNeXt29(8, 64),
|
||||
'resnext29-16-64': lambda _=None: resnet.ResNeXt29(16, 64),
|
||||
|
||||
# "Densely Connected Convolutional Networks"
|
||||
'densenetbc100': densenet.DenseNetBC100,
|
||||
'densenetbc250': densenet.DenseNetBC250,
|
||||
'densenetbc190': densenet.DenseNetBC190,
|
||||
|
||||
# Kuangliu/pytorch-cifar
|
||||
'resnet18': resnet.ResNet18,
|
||||
'resnet50': resnet.ResNet50,
|
||||
'resnet101': resnet.ResNet101,
|
||||
'resnet152': resnet.ResNet152,
|
||||
}
|
||||
|
||||
|
||||
def count_parameters(model):
|
||||
c = map(lambda p: reduce(lambda x, y: x * y, p.size()), model.parameters())
|
||||
return sum(c)
|
||||
|
||||
|
||||
def correct(outputs, targets, top=(1, )):
|
||||
_, predictions = outputs.topk(max(top), dim=1, largest=True, sorted=True)
|
||||
targets = targets.view(-1, 1).expand_as(predictions)
|
||||
corrects = predictions.eq(targets).cpu().cumsum(1).sum(0)
|
||||
tops = list(map(lambda k: corrects.data[0][k - 1], top))
|
||||
return tops
|
||||
|
||||
|
||||
def save_result(result, path):
|
||||
write_heading = not os.path.exists(path)
|
||||
with open(path, mode='a') as out:
|
||||
if write_heading:
|
||||
out.write(",".join([str(k) for k, v in result.items()]) + '\n')
|
||||
out.write(",".join([str(v) for k, v in result.items()]) + '\n')
|
||||
|
||||
|
||||
def run(epoch, model, loader, criterion=None, optimizer=None, top=(1, 5),
|
||||
use_cuda=False, tracking=None, max_value=None, train=True):
|
||||
|
||||
assert criterion is not None or not train, 'Need criterion to train model'
|
||||
assert optimizer is not None or not train, 'Need optimizer to train model'
|
||||
max_value = max_value or progressbar.UnknownLength
|
||||
bar = progressbar.ProgressBar(max_value=max_value)
|
||||
total = 0
|
||||
correct_counts = {}
|
||||
if train:
|
||||
model.train()
|
||||
else:
|
||||
model.eval()
|
||||
|
||||
start = datetime.now()
|
||||
for batch_index, (inputs, targets) in enumerate(loader):
|
||||
inputs = Variable(inputs, requires_grad=False, volatile=not train)
|
||||
targets = Variable(targets, requires_grad=False, volatile=not train)
|
||||
|
||||
if use_cuda:
|
||||
inputs = inputs.cuda()
|
||||
targets = targets.cuda()
|
||||
|
||||
outputs = model(inputs)
|
||||
|
||||
if train:
|
||||
loss = criterion(outputs, targets)
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
_, predictions = torch.max(outputs.data, 1)
|
||||
batch_size = targets.size(0)
|
||||
top_correct = correct(outputs, targets, top=top)
|
||||
total += batch_size
|
||||
for k, count in zip(top, top_correct):
|
||||
correct_counts[k] = correct_counts.get(k, 0) + count
|
||||
|
||||
end = datetime.now()
|
||||
if tracking is not None:
|
||||
result = OrderedDict()
|
||||
result['timestamp'] = datetime.now()
|
||||
result['batch_duration'] = end - start
|
||||
result['epoch'] = epoch
|
||||
result['batch'] = batch_index
|
||||
result['batch_size'] = batch_size
|
||||
for i, k in enumerate(top):
|
||||
result['top{}_correct'.format(k)] = top_correct[i]
|
||||
if train:
|
||||
result['loss'] = loss.data[0]
|
||||
save_result(result, tracking)
|
||||
|
||||
bar.update(batch_index + 1)
|
||||
start = datetime.now()
|
||||
|
||||
print()
|
||||
if train:
|
||||
message = 'Training accuracy of'
|
||||
else:
|
||||
message = 'Test accuracy of'
|
||||
for k in top:
|
||||
accuracy = correct_counts[k] / total
|
||||
message += ' top-{}: {}'.format(k, accuracy)
|
||||
print(message)
|
||||
return (1. * correct_counts[top[0]]) / total, batch_index + 1
|
||||
|
||||
|
||||
def save(model, directory, epoch, accuracy, use_cuda=False, filename=None):
|
||||
state = {
|
||||
'model': model.module if use_cuda else model,
|
||||
'epoch': epoch,
|
||||
'accuracy': accuracy
|
||||
}
|
||||
|
||||
filename = filename or 'checkpoint_{}.t7'.format(epoch)
|
||||
torch.save(state, os.path.join(directory, filename))
|
||||
|
||||
|
||||
def save_config(config, run_dir):
|
||||
path = os.path.join(run_dir, "config_{}.json".format(config['timestamp']))
|
||||
with open(path, 'w') as config_file:
|
||||
json.dump(config, config_file)
|
||||
config_file.write('\n')
|
||||
|
||||
|
||||
def load(path):
|
||||
assert os.path.exists(path)
|
||||
state = torch.load(path)
|
||||
model = state['model']
|
||||
epoch = state['epoch']
|
||||
accuracy = state['accuracy']
|
||||
return model, epoch, accuracy
|
||||
|
||||
|
||||
def latest_file(model):
|
||||
restore = f'./run/{model}'
|
||||
timestamps = sorted(os.listdir(restore))
|
||||
assert len(timestamps) > 0
|
||||
run_dir = os.path.join(restore, timestamps[-1])
|
||||
files = os.listdir(run_dir)
|
||||
max_checkpoint = -1
|
||||
for filename in files:
|
||||
if re.search('checkpoint_\d+.t7', filename):
|
||||
num = int(re.search('\d+', filename).group())
|
||||
|
||||
if num > max_checkpoint:
|
||||
max_checkpoint = num
|
||||
max_checkpoint_file = filename
|
||||
|
||||
assert max_checkpoint != -1
|
||||
return os.path.join(run_dir, max_checkpoint_file)
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option('--dataset-dir', default='./data/cifar10')
|
||||
@click.option('--checkpoint', '-c', type=click.Choice(['best', 'all', 'last']),
|
||||
default='last')
|
||||
@click.option('--restore', '-r')
|
||||
@click.option('--tracking/--no-tracking', default=True)
|
||||
@click.option('--cuda/--no-cuda', default=True)
|
||||
@click.option('--epochs', '-e', default=200)
|
||||
@click.option('--batch-size', '-b', default=32)
|
||||
@click.option('--learning-rate', '-l', default=1e-3)
|
||||
@click.option('--sgd', 'optimizer', flag_value='sgd')
|
||||
@click.option('--adam', 'optimizer', flag_value='adam', default=True)
|
||||
@click.option('--augmentation/--no-augmentation', default=True)
|
||||
@click.option('--num-workers', type=int)
|
||||
@click.option('--weight-decay', default=5e-4)
|
||||
@click.option('--model', '-m', type=click.Choice(MODELS.keys()),
|
||||
default='resnet20')
|
||||
def main(dataset_dir, checkpoint, restore, tracking, cuda, epochs,
|
||||
batch_size, learning_rate, optimizer, augmentation, num_workers,
|
||||
weight_decay, model):
|
||||
timestamp = "{:.0f}".format(datetime.utcnow().timestamp())
|
||||
config = {k: v for k, v in locals().items()}
|
||||
|
||||
use_cuda = cuda and torch.cuda.is_available()
|
||||
if use_cuda:
|
||||
num_workers = num_workers or torch.cuda.device_count()
|
||||
else:
|
||||
num_workers = num_workers or 1
|
||||
|
||||
print(f"using {num_workers} workers for data loading")
|
||||
|
||||
print("Preparing data:")
|
||||
|
||||
if augmentation:
|
||||
transform_train = [
|
||||
transforms.RandomCrop(32, padding=4),
|
||||
transforms.RandomHorizontalFlip()
|
||||
]
|
||||
else:
|
||||
transform_train = []
|
||||
|
||||
transform_train = transforms.Compose(transform_train + [
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize(MEAN, STD),
|
||||
])
|
||||
|
||||
trainset = dset.CIFAR10(root=dataset_dir, train=True, download=True,
|
||||
transform=transform_train)
|
||||
train_loader = torch.utils.data.DataLoader(
|
||||
trainset, batch_size=batch_size, shuffle=True, num_workers=num_workers,
|
||||
pin_memory=use_cuda)
|
||||
|
||||
transform_test = transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize(MEAN, STD),
|
||||
])
|
||||
|
||||
testset = dset.CIFAR10(root=dataset_dir, train=False, download=True,
|
||||
transform=transform_test)
|
||||
test_loader = torch.utils.data.DataLoader(
|
||||
testset, batch_size=batch_size, shuffle=False, num_workers=num_workers,
|
||||
pin_memory=use_cuda)
|
||||
|
||||
if restore is not None:
|
||||
if restore == 'latest':
|
||||
restore = latest_file(model)
|
||||
print(f'Restoring model from {restore}')
|
||||
model, start_epoch, best_accuracy = load(restore)
|
||||
start_epoch += 1
|
||||
print('Starting accuracy is {}'.format(best_accuracy))
|
||||
run_dir = os.path.split(restore)[0]
|
||||
else:
|
||||
print(f'Building {model} model')
|
||||
best_accuracy = -1
|
||||
start_epoch = 1
|
||||
run_dir = f"./run/{model}/{timestamp}"
|
||||
model = MODELS[model]()
|
||||
|
||||
if not os.path.exists(run_dir):
|
||||
os.makedirs(run_dir)
|
||||
save_config(config, run_dir)
|
||||
|
||||
print(model)
|
||||
print("{} parameters".format(count_parameters(model)))
|
||||
print(f"Run directory set to {run_dir}")
|
||||
|
||||
# Save model text description
|
||||
with open(os.path.join(run_dir, 'model.txt'), 'w') as file:
|
||||
file.write(str(model))
|
||||
|
||||
if tracking:
|
||||
train_results_file = os.path.join(run_dir, 'train_results.csv')
|
||||
test_results_file = os.path.join(run_dir, 'test_results.csv')
|
||||
else:
|
||||
train_results_file = None
|
||||
test_results_file = None
|
||||
|
||||
if use_cuda:
|
||||
print('Copying model to GPU')
|
||||
model.cuda()
|
||||
model = torch.nn.DataParallel(
|
||||
model, device_ids=range(torch.cuda.device_count()))
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
|
||||
# Other parameters?
|
||||
if optimizer == 'adam':
|
||||
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
|
||||
elif optimizer == 'sgd':
|
||||
optimizer = optim.SGD(model.parameters(), lr=learning_rate,
|
||||
momentum=0.9,
|
||||
weight_decay=weight_decay)
|
||||
else:
|
||||
raise NotImplementedError("Unknown optimizer: {}".format(optimizer))
|
||||
|
||||
train_max_value = None
|
||||
test_max_value = None
|
||||
end_epoch = start_epoch + epochs
|
||||
for epoch in range(start_epoch, end_epoch):
|
||||
print('Epoch {} of {}'.format(epoch, end_epoch - 1))
|
||||
train_acc, train_max_value = run(epoch, model, train_loader, criterion,
|
||||
optimizer, use_cuda=use_cuda,
|
||||
tracking=train_results_file,
|
||||
max_value=train_max_value, train=True)
|
||||
|
||||
test_acc, test_max_value = run(epoch, model, test_loader,
|
||||
use_cuda=use_cuda,
|
||||
tracking=test_results_file, train=False)
|
||||
|
||||
if test_acc > best_accuracy:
|
||||
print('New best model!')
|
||||
save(model, run_dir, epoch, test_acc, use_cuda=use_cuda,
|
||||
filename='checkpoint_best_model.t7')
|
||||
best_accuracy = test_acc
|
||||
|
||||
last_epoch = epoch == (end_epoch - 1)
|
||||
if checkpoint == 'all' or (checkpoint == 'last' and last_epoch):
|
||||
save(model, run_dir, epoch, test_acc, use_cuda=use_cuda)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
20
pytorch/CIFAR10/setup.py
Normal file
20
pytorch/CIFAR10/setup.py
Normal file
|
@ -0,0 +1,20 @@
|
|||
from setuptools import setup
|
||||
|
||||
setup(
|
||||
name='benchmark',
|
||||
version='0.0.0',
|
||||
url='http://www.codycoleman.com',
|
||||
author='Cody Austun Coleman',
|
||||
author_email='cody.coleman@cs.stanford.edu',
|
||||
packages=['benchmark'],
|
||||
entry_points={
|
||||
'console_scripts': [
|
||||
'bench = benchmark.train:main'
|
||||
]
|
||||
},
|
||||
install_requires=[
|
||||
'torchvision',
|
||||
'click',
|
||||
'progressbar2'
|
||||
]
|
||||
)
|
18
tensorflow/CIFAR10/README.md
Normal file
18
tensorflow/CIFAR10/README.md
Normal file
|
@ -0,0 +1,18 @@
|
|||
# ResNets on TensorFlow
|
||||
|
||||
To train a ResNet, run,
|
||||
|
||||
```bash
|
||||
python3 resnet/resnet_main.py --train_data_path=cifar10/data_batch* --log_root=data/resnet20/log_root \
|
||||
--train_dir=data/resnet20/log_root/train --dataset='cifar10' --model=resnet20 \
|
||||
--num_gpus=1 --checkpoint_dir=data/resnet20/checkpoints --data_format=NCHW
|
||||
```
|
||||
|
||||
To evaluate resulting checkpoints, run,
|
||||
|
||||
```bash
|
||||
python3 eval_checkpoints.py -i data/resnet20/checkpoints \
|
||||
-c "python3 resnet/resnet_main.py --mode=eval --eval_data_path=cifar10/test_batch.bin --eval_dir=data/resnet20/log_root/eval --dataset='cifar10' --model=resnet20 --num_gpus=1 --eval_batch_count=100 --eval_once=True --data_format=NCHW"
|
||||
```
|
||||
|
||||
Make sure to first follow the instructions in `resnet/README.md` to get necessary data, etc.
|
59
tensorflow/CIFAR10/eval_checkpoints.py
Normal file
59
tensorflow/CIFAR10/eval_checkpoints.py
Normal file
|
@ -0,0 +1,59 @@
|
|||
import argparse
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
def main(checkpoints_path, command, start_cnt):
|
||||
cnt = start_cnt
|
||||
|
||||
times = {}
|
||||
cum_time = 0.0
|
||||
with open(os.path.join(checkpoints_path, "times.log"), 'r') as f:
|
||||
output = f.read().strip()
|
||||
output_lines = output.split('\n')
|
||||
for output_line in output_lines:
|
||||
[step, time] = output_line.split('\t')
|
||||
step = int(step.split(': ')[1])
|
||||
time = float(time.split(': ')[1])
|
||||
cum_time += time
|
||||
times[step] = cum_time
|
||||
|
||||
print("Time (in secs)\tNumber of minibatches\tTop 1 accuracy\tTop 5 accuracy")
|
||||
while True:
|
||||
ckpt_path = ("%5d" % cnt).replace(' ', '0')
|
||||
full_ckpt_path = os.path.join(checkpoints_path, ckpt_path)
|
||||
if not os.path.exists(full_ckpt_path):
|
||||
break
|
||||
if len(os.listdir(full_ckpt_path)) <= 2:
|
||||
cnt += 1
|
||||
continue
|
||||
full_command = command + " --log_root=%s 2>/dev/null" % full_ckpt_path
|
||||
output = subprocess.check_output(full_command, shell=True)
|
||||
output = output.decode('utf8').strip()
|
||||
for line in output.split('\n'):
|
||||
if "Precision" in line and "Recall" in line:
|
||||
tokens = line.split(", ") # TODO: Nasty hack, make more robust.
|
||||
precision_at_1 = float(tokens[0].split()[-1])
|
||||
recall_at_5 = float(tokens[1].split()[-1])
|
||||
step = int(tokens[2].split()[3])
|
||||
stats = [times[step], step, precision_at_1, recall_at_5]
|
||||
print("\t".join([str(stat) for stat in stats]))
|
||||
sys.stdout.flush()
|
||||
cnt += 1
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(
|
||||
description=("Backup model checkpoints periodically")
|
||||
)
|
||||
parser.add_argument('-i', "--checkpoints_path", type=str, required=True,
|
||||
help="Path to dumped model checkpoints")
|
||||
parser.add_argument('-c', "--command", type=str, required=True,
|
||||
help="Command to evaluate each individual checkpoint")
|
||||
parser.add_argument('-s', "--start_cnt", type=int, default=1,
|
||||
help="Count to start evaluating checkpoints from")
|
||||
|
||||
cmdline_args = parser.parse_args()
|
||||
opt_dict = vars(cmdline_args)
|
||||
|
||||
main(opt_dict["checkpoints_path"], opt_dict["command"], opt_dict["start_cnt"])
|
2205
tensorflow/CIFAR10/logs/16vCPUs_gc/resnet164_b_train.log
Normal file
2205
tensorflow/CIFAR10/logs/16vCPUs_gc/resnet164_b_train.log
Normal file
File diff suppressed because it is too large
Load diff
2145
tensorflow/CIFAR10/logs/16vCPUs_gc/resnet164_nb_train.log
Normal file
2145
tensorflow/CIFAR10/logs/16vCPUs_gc/resnet164_nb_train.log
Normal file
File diff suppressed because it is too large
Load diff
1713
tensorflow/CIFAR10/logs/16vCPUs_gc/resnet20_train.log
Normal file
1713
tensorflow/CIFAR10/logs/16vCPUs_gc/resnet20_train.log
Normal file
File diff suppressed because it is too large
Load diff
1821
tensorflow/CIFAR10/logs/16vCPUs_gc/resnet56_train.log
Normal file
1821
tensorflow/CIFAR10/logs/16vCPUs_gc/resnet56_train.log
Normal file
File diff suppressed because it is too large
Load diff
2222
tensorflow/CIFAR10/logs/1k80_ec2/resnet164_b_train.log
Normal file
2222
tensorflow/CIFAR10/logs/1k80_ec2/resnet164_b_train.log
Normal file
File diff suppressed because it is too large
Load diff
2162
tensorflow/CIFAR10/logs/1k80_ec2/resnet164_nb_train.log
Normal file
2162
tensorflow/CIFAR10/logs/1k80_ec2/resnet164_nb_train.log
Normal file
File diff suppressed because it is too large
Load diff
1728
tensorflow/CIFAR10/logs/1k80_ec2/resnet20_train.log
Normal file
1728
tensorflow/CIFAR10/logs/1k80_ec2/resnet20_train.log
Normal file
File diff suppressed because it is too large
Load diff
1836
tensorflow/CIFAR10/logs/1k80_ec2/resnet56_train.log
Normal file
1836
tensorflow/CIFAR10/logs/1k80_ec2/resnet56_train.log
Normal file
File diff suppressed because it is too large
Load diff
2222
tensorflow/CIFAR10/logs/1k80_gc/resnet164_b_train.log
Normal file
2222
tensorflow/CIFAR10/logs/1k80_gc/resnet164_b_train.log
Normal file
File diff suppressed because it is too large
Load diff
2162
tensorflow/CIFAR10/logs/1k80_gc/resnet164_nb_train.log
Normal file
2162
tensorflow/CIFAR10/logs/1k80_gc/resnet164_nb_train.log
Normal file
File diff suppressed because it is too large
Load diff
1728
tensorflow/CIFAR10/logs/1k80_gc/resnet20_train.log
Normal file
1728
tensorflow/CIFAR10/logs/1k80_gc/resnet20_train.log
Normal file
File diff suppressed because it is too large
Load diff
1836
tensorflow/CIFAR10/logs/1k80_gc/resnet56_train.log
Normal file
1836
tensorflow/CIFAR10/logs/1k80_gc/resnet56_train.log
Normal file
File diff suppressed because it is too large
Load diff
2221
tensorflow/CIFAR10/logs/1p100_dawn/resnet164_b_train.log
Normal file
2221
tensorflow/CIFAR10/logs/1p100_dawn/resnet164_b_train.log
Normal file
File diff suppressed because it is too large
Load diff
2161
tensorflow/CIFAR10/logs/1p100_dawn/resnet164_nb_train.log
Normal file
2161
tensorflow/CIFAR10/logs/1p100_dawn/resnet164_nb_train.log
Normal file
File diff suppressed because it is too large
Load diff
1727
tensorflow/CIFAR10/logs/1p100_dawn/resnet20_train.log
Normal file
1727
tensorflow/CIFAR10/logs/1p100_dawn/resnet20_train.log
Normal file
File diff suppressed because it is too large
Load diff
1835
tensorflow/CIFAR10/logs/1p100_dawn/resnet56_train.log
Normal file
1835
tensorflow/CIFAR10/logs/1p100_dawn/resnet56_train.log
Normal file
File diff suppressed because it is too large
Load diff
88
tensorflow/CIFAR10/resnet/README.md
Normal file
88
tensorflow/CIFAR10/resnet/README.md
Normal file
|
@ -0,0 +1,88 @@
|
|||
# ResNet on CIFAR10 and CIFAR100
|
||||
|
||||
(Borrowed from the tensorflow/models repository)
|
||||
|
||||
## Dataset
|
||||
|
||||
https://www.cs.toronto.edu/~kriz/cifar.html
|
||||
|
||||
## Related papers
|
||||
|
||||
- [Identity Mappings in Deep Residual Networks](https://arxiv.org/pdf/1603.05027v2.pdf)
|
||||
- [Deep Residual Learning for Image Recognition](https://arxiv.org/pdf/1512.03385v1.pdf)
|
||||
- [Wide Residual Networks](https://arxiv.org/pdf/1605.07146v1.pdf)
|
||||
|
||||
## Setting
|
||||
|
||||
* Pad to 36x36 and random crop. Horizontal flip. Per-image whitening.
|
||||
* Momentum optimizer (momentum = 0.9).
|
||||
* Learning rate schedule: 0.01 (1 epoch), 0.1 (90 epochs), 0.01 (45 epochs), 0.001 (45 epochs).
|
||||
* L2 weight decay: 0.005.
|
||||
* Batch size: 128. (28-10 wide and 1001 layer bottleneck use 64)
|
||||
|
||||
## Results
|
||||
|
||||
CIFAR-10 Model|Best Precision|Steps
|
||||
--------------|--------------|------
|
||||
32 layer|92.5%|~80k
|
||||
110 layer|93.6%|~80k
|
||||
164 layer bottleneck|94.5%|~80k
|
||||
1001 layer bottleneck|94.9%|~80k
|
||||
28-10 wide|95%|~90k
|
||||
|
||||
CIFAR-100 Model|Best Precision|Steps
|
||||
---------------|--------------|-----
|
||||
32 layer|68.1%|~45k
|
||||
110 layer|71.3%|~60k
|
||||
164 layer bottleneck|75.7%|~50k
|
||||
1001 layer bottleneck|78.2%|~70k
|
||||
28-10 wide|78.3%|~70k
|
||||
|
||||
## Prerequisites
|
||||
|
||||
1. Install TensorFlow 1.2 (preferably from source for higher performance) and Python 3.6.2.
|
||||
|
||||
2. Download CIFAR-10/CIFAR-100 dataset.
|
||||
|
||||
```shell
|
||||
curl -o cifar-10-binary.tar.gz https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz
|
||||
curl -o cifar-100-binary.tar.gz https://www.cs.toronto.edu/~kriz/cifar-100-binary.tar.gz
|
||||
```
|
||||
|
||||
## How to run
|
||||
|
||||
```shell
|
||||
# cd to the models repository and run with bash. Expected command output shown.
|
||||
# The directory should contain an empty WORKSPACE file, the resnet code, and the cifar10 dataset.
|
||||
# Note: The user can split 5k from train set for eval set.
|
||||
$ ls -R
|
||||
.:
|
||||
cifar10 resnet WORKSPACE
|
||||
|
||||
./cifar10:
|
||||
data_batch_1.bin data_batch_2.bin data_batch_3.bin data_batch_4.bin
|
||||
data_batch_5.bin test_batch.bin
|
||||
|
||||
./resnet:
|
||||
cifar_input.py README.md resnet_main.py resnet_model.py
|
||||
|
||||
# Train the model.
|
||||
$ python3 resnet/resnet_main.py --train_data_path=cifar10/data_batch* \
|
||||
--log_root=/tmp/resnet_model \
|
||||
--train_dir=/tmp/resnet_model/train \
|
||||
--dataset='cifar10' \
|
||||
--num_gpus=1
|
||||
|
||||
# While the model is training, you can also check on its progress using tensorboard:
|
||||
$ tensorboard --logdir=/tmp/resnet_model
|
||||
|
||||
# Evaluate the model.
|
||||
# Avoid running on the same GPU as the training job at the same time,
|
||||
# otherwise, you might run out of memory.
|
||||
$ python3 resnet/resnet_main.py --eval_data_path=cifar10/test_batch.bin \
|
||||
--log_root=/tmp/resnet_model \
|
||||
--eval_dir=/tmp/resnet_model/test \
|
||||
--mode=eval \
|
||||
--dataset='cifar10' \
|
||||
--num_gpus=0
|
||||
```
|
121
tensorflow/CIFAR10/resnet/cifar_input.py
Normal file
121
tensorflow/CIFAR10/resnet/cifar_input.py
Normal file
|
@ -0,0 +1,121 @@
|
|||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
|
||||
"""CIFAR dataset input module.
|
||||
"""
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
def build_input(dataset, data_path, batch_size, mode, data_format):
|
||||
"""Build CIFAR image and labels.
|
||||
|
||||
Args:
|
||||
dataset: Either 'cifar10' or 'cifar100'.
|
||||
data_path: Filename for data.
|
||||
batch_size: Input batch size.
|
||||
mode: Either 'train' or 'eval'.
|
||||
data_format: Either 'NCHW' or 'NHWC'.
|
||||
Returns:
|
||||
images: Batches of images. [batch_size, image_size, image_size, 3]
|
||||
labels: Batches of labels. [batch_size, num_classes]
|
||||
Raises:
|
||||
ValueError: when the specified dataset is not supported.
|
||||
"""
|
||||
with tf.device('/cpu:0'):
|
||||
image_size = 32
|
||||
if dataset == 'cifar10':
|
||||
label_bytes = 1
|
||||
label_offset = 0
|
||||
num_classes = 10
|
||||
elif dataset == 'cifar100':
|
||||
label_bytes = 1
|
||||
label_offset = 1
|
||||
num_classes = 100
|
||||
else:
|
||||
raise ValueError('Not supported dataset %s', dataset)
|
||||
|
||||
depth = 3
|
||||
image_bytes = image_size * image_size * depth
|
||||
record_bytes = label_bytes + label_offset + image_bytes
|
||||
|
||||
data_files = tf.gfile.Glob(data_path)
|
||||
file_queue = tf.train.string_input_producer(data_files, shuffle=True)
|
||||
# Read examples from files in the filename queue.
|
||||
reader = tf.FixedLengthRecordReader(record_bytes=record_bytes)
|
||||
_, value = reader.read(file_queue)
|
||||
|
||||
# Convert these examples to dense labels and processed images.
|
||||
record = tf.reshape(tf.decode_raw(value, tf.uint8), [record_bytes])
|
||||
label = tf.cast(tf.slice(record, [label_offset], [label_bytes]), tf.int32)
|
||||
# Convert from string to [depth * height * width] to [depth, height, width].
|
||||
depth_major = tf.reshape(tf.slice(record, [label_bytes], [image_bytes]),
|
||||
[depth, image_size, image_size])
|
||||
# Convert from [depth, height, width] to [height, width, depth].
|
||||
image = tf.cast(tf.transpose(depth_major, [1, 2, 0]), tf.float32)
|
||||
|
||||
if mode == 'train':
|
||||
image = tf.image.resize_image_with_crop_or_pad(
|
||||
image, image_size+4, image_size+4)
|
||||
image = tf.random_crop(image, [image_size, image_size, 3])
|
||||
image = tf.image.random_flip_left_right(image)
|
||||
# Brightness/saturation/constrast provides small gains .2%~.5% on cifar.
|
||||
# image = tf.image.random_brightness(image, max_delta=63. / 255.)
|
||||
# image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
|
||||
# image = tf.image.random_contrast(image, lower=0.2, upper=1.8)
|
||||
image = tf.image.per_image_standardization(image)
|
||||
|
||||
example_queue = tf.RandomShuffleQueue(
|
||||
capacity=16 * batch_size,
|
||||
min_after_dequeue=8 * batch_size,
|
||||
dtypes=[tf.float32, tf.int32],
|
||||
shapes=[[image_size, image_size, depth], [1]])
|
||||
num_threads = 16
|
||||
else:
|
||||
image = tf.image.resize_image_with_crop_or_pad(
|
||||
image, image_size, image_size)
|
||||
image = tf.image.per_image_standardization(image)
|
||||
|
||||
example_queue = tf.FIFOQueue(
|
||||
3 * batch_size,
|
||||
dtypes=[tf.float32, tf.int32],
|
||||
shapes=[[image_size, image_size, depth], [1]])
|
||||
num_threads = 1
|
||||
|
||||
example_enqueue_op = example_queue.enqueue([image, label])
|
||||
tf.train.add_queue_runner(tf.train.queue_runner.QueueRunner(
|
||||
example_queue, [example_enqueue_op] * num_threads))
|
||||
|
||||
# Read 'batch' labels + images from the example queue.
|
||||
images, labels = example_queue.dequeue_many(batch_size)
|
||||
labels = tf.reshape(labels, [batch_size, 1])
|
||||
indices = tf.reshape(tf.range(0, batch_size, 1), [batch_size, 1])
|
||||
labels = tf.sparse_to_dense(
|
||||
tf.concat(values=[indices, labels], axis=1),
|
||||
[batch_size, num_classes], 1.0, 0.0)
|
||||
|
||||
if data_format == 'NCHW':
|
||||
images = tf.transpose(images, [0, 3, 1, 2])
|
||||
|
||||
assert len(images.get_shape()) == 4
|
||||
assert images.get_shape()[0] == batch_size
|
||||
if data_format == 'NCHW':
|
||||
assert images.get_shape()[1] == 3
|
||||
else:
|
||||
assert images.get_shape()[-1] == 3
|
||||
assert len(labels.get_shape()) == 2
|
||||
assert labels.get_shape()[0] == batch_size
|
||||
assert labels.get_shape()[1] == num_classes
|
||||
|
||||
return images, labels
|
302
tensorflow/CIFAR10/resnet/resnet_main.py
Normal file
302
tensorflow/CIFAR10/resnet/resnet_main.py
Normal file
|
@ -0,0 +1,302 @@
|
|||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
|
||||
"""ResNet Train/Eval module.
|
||||
"""
|
||||
import os
|
||||
import six
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
|
||||
import cifar_input
|
||||
import numpy as np
|
||||
import resnet_model
|
||||
import tensorflow as tf
|
||||
|
||||
FLAGS = tf.app.flags.FLAGS
|
||||
tf.app.flags.DEFINE_string('dataset', 'cifar10', 'cifar10 or cifar100.')
|
||||
tf.app.flags.DEFINE_string('mode', 'train', 'train or eval.')
|
||||
tf.app.flags.DEFINE_string('model', '', 'model to train.')
|
||||
tf.app.flags.DEFINE_string('data_format', 'NHWC',
|
||||
"""Data layout to use: NHWC (TF native)
|
||||
or NCHW (cuDNN native).""")
|
||||
tf.app.flags.DEFINE_string('train_data_path', '',
|
||||
'Filepattern for training data.')
|
||||
tf.app.flags.DEFINE_string('eval_data_path', '',
|
||||
'Filepattern for eval data')
|
||||
tf.app.flags.DEFINE_integer('image_size', 32, 'Image side length.')
|
||||
tf.app.flags.DEFINE_string('train_dir', '',
|
||||
'Directory to keep training outputs.')
|
||||
tf.app.flags.DEFINE_string('eval_dir', '',
|
||||
'Directory to keep eval outputs.')
|
||||
tf.app.flags.DEFINE_integer('eval_batch_count', 50,
|
||||
'Number of batches to eval.')
|
||||
tf.app.flags.DEFINE_bool('eval_once', False,
|
||||
'Whether evaluate the model only once.')
|
||||
tf.app.flags.DEFINE_string('log_root', '',
|
||||
'Should be a parent directory of FLAGS.train_dir/eval_dir.')
|
||||
tf.app.flags.DEFINE_string('checkpoint_dir', '',
|
||||
'Directory to store the checkpoints')
|
||||
tf.app.flags.DEFINE_integer('num_gpus', 0,
|
||||
'Number of gpus used for training. (0 or 1)')
|
||||
tf.app.flags.DEFINE_bool('use_bottleneck', False,
|
||||
'Use bottleneck module or not.')
|
||||
tf.app.flags.DEFINE_bool('time_inference', False,
|
||||
'Time inference.')
|
||||
tf.app.flags.DEFINE_integer('batch_size', -1,
|
||||
'Batch size to use.')
|
||||
|
||||
|
||||
def train(hps):
|
||||
"""Training loop."""
|
||||
images, labels = cifar_input.build_input(
|
||||
FLAGS.dataset, FLAGS.train_data_path, hps.batch_size, FLAGS.mode, hps.data_format)
|
||||
model = resnet_model.ResNet(hps, images, labels, FLAGS.mode)
|
||||
model.build_graph()
|
||||
|
||||
param_stats = tf.contrib.tfprof.model_analyzer.print_model_analysis(
|
||||
tf.get_default_graph(),
|
||||
tfprof_options=tf.contrib.tfprof.model_analyzer.
|
||||
TRAINABLE_VARS_PARAMS_STAT_OPTIONS)
|
||||
sys.stdout.write('total_params: %d\n' % param_stats.total_parameters)
|
||||
|
||||
tf.contrib.tfprof.model_analyzer.print_model_analysis(
|
||||
tf.get_default_graph(),
|
||||
tfprof_options=tf.contrib.tfprof.model_analyzer.FLOAT_OPS_OPTIONS)
|
||||
|
||||
truth = tf.argmax(model.labels, axis=1)
|
||||
predictions = tf.argmax(model.predictions, axis=1)
|
||||
precision = tf.reduce_mean(tf.to_float(tf.equal(predictions, truth)))
|
||||
|
||||
summary_hook = tf.train.SummarySaverHook(
|
||||
save_steps=100,
|
||||
output_dir=FLAGS.train_dir,
|
||||
summary_op=tf.summary.merge([model.summaries,
|
||||
tf.summary.scalar('Precision', precision)]))
|
||||
|
||||
num_steps_per_epoch = 391 # TODO: Don't hardcode this.
|
||||
|
||||
logging_hook = tf.train.LoggingTensorHook(
|
||||
tensors={'step': model.global_step,
|
||||
'loss': model.cost,
|
||||
'precision': precision},
|
||||
every_n_iter=100)
|
||||
|
||||
class _LearningRateSetterHook(tf.train.SessionRunHook):
|
||||
"""Sets learning_rate based on global step."""
|
||||
|
||||
def begin(self):
|
||||
self._lrn_rate = 0.01
|
||||
|
||||
def before_run(self, run_context):
|
||||
return tf.train.SessionRunArgs(
|
||||
model.global_step, # Asks for global step value.
|
||||
feed_dict={model.lrn_rate: self._lrn_rate}) # Sets learning rate
|
||||
|
||||
def after_run(self, run_context, run_values):
|
||||
train_step = run_values.results
|
||||
if train_step < num_steps_per_epoch:
|
||||
self._lrn_rate = 0.01
|
||||
elif train_step < (91 * num_steps_per_epoch):
|
||||
self._lrn_rate = 0.1
|
||||
elif train_step < (136 * num_steps_per_epoch):
|
||||
self._lrn_rate = 0.01
|
||||
elif train_step < (181 * num_steps_per_epoch):
|
||||
self._lrn_rate = 0.001
|
||||
else:
|
||||
self._lrn_rate = 0.0001
|
||||
|
||||
class _SaverHook(tf.train.SessionRunHook):
|
||||
"""Sets learning_rate based on global step."""
|
||||
|
||||
def begin(self):
|
||||
self.saver = tf.train.Saver(max_to_keep=10000)
|
||||
subprocess.call("rm -rf %s; mkdir -p %s" % (FLAGS.checkpoint_dir,
|
||||
FLAGS.checkpoint_dir), shell=True)
|
||||
self.f = open(os.path.join(FLAGS.checkpoint_dir, "times.log"), 'w')
|
||||
|
||||
def after_create_session(self, sess, coord):
|
||||
self.sess = sess
|
||||
self.start_time = time.time()
|
||||
|
||||
def before_run(self, run_context):
|
||||
return tf.train.SessionRunArgs(
|
||||
model.global_step # Asks for global step value.
|
||||
)
|
||||
|
||||
def after_run(self, run_context, run_values):
|
||||
train_step = run_values.results
|
||||
epoch = train_step / num_steps_per_epoch
|
||||
if train_step % num_steps_per_epoch == 0:
|
||||
end_time = time.time()
|
||||
directory = os.path.join(FLAGS.checkpoint_dir, ("%5d" % epoch).replace(' ', '0'))
|
||||
subprocess.call("mkdir -p %s" % directory, shell=True)
|
||||
ckpt_name = 'model.ckpt'
|
||||
self.saver.save(self.sess, os.path.join(directory, ckpt_name),
|
||||
global_step=train_step)
|
||||
self.f.write("Step: %d\tTime: %s\n" % (train_step, end_time - self.start_time))
|
||||
print("Saved checkpoint after %d epoch(s) to %s..." % (epoch, directory))
|
||||
sys.stdout.flush()
|
||||
self.start_time = time.time()
|
||||
|
||||
def end(self, sess):
|
||||
self.f.close()
|
||||
|
||||
with tf.train.MonitoredTrainingSession(
|
||||
checkpoint_dir=FLAGS.log_root,
|
||||
hooks=[logging_hook, _LearningRateSetterHook()],
|
||||
chief_only_hooks=[summary_hook, _SaverHook()],
|
||||
save_checkpoint_secs=None,
|
||||
# Since we provide a SummarySaverHook, we need to disable default
|
||||
# SummarySaverHook. To do that we set save_summaries_steps to 0.
|
||||
save_summaries_steps=None,
|
||||
save_summaries_secs=None,
|
||||
config=tf.ConfigProto(allow_soft_placement=True)) as mon_sess:
|
||||
for i in range(num_steps_per_epoch * 181):
|
||||
mon_sess.run(model.train_op)
|
||||
|
||||
def evaluate(hps):
|
||||
"""Eval loop."""
|
||||
images, labels = cifar_input.build_input(
|
||||
FLAGS.dataset, FLAGS.eval_data_path, hps.batch_size, FLAGS.mode, hps.data_format)
|
||||
model = resnet_model.ResNet(hps, images, labels, FLAGS.mode)
|
||||
model.build_graph()
|
||||
saver = tf.train.Saver()
|
||||
summary_writer = tf.summary.FileWriter(FLAGS.eval_dir)
|
||||
|
||||
sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
|
||||
tf.train.start_queue_runners(sess)
|
||||
|
||||
best_precision = 0.0
|
||||
while True:
|
||||
try:
|
||||
ckpt_state = tf.train.get_checkpoint_state(FLAGS.log_root)
|
||||
except tf.errors.OutOfRangeError as e:
|
||||
tf.logging.error('Cannot restore checkpoint: %s', e)
|
||||
continue
|
||||
if not (ckpt_state and ckpt_state.model_checkpoint_path):
|
||||
tf.logging.info('No model to eval yet at %s', FLAGS.log_root)
|
||||
break
|
||||
tf.logging.info('Loading checkpoint %s', ckpt_state.model_checkpoint_path)
|
||||
saver.restore(sess, ckpt_state.model_checkpoint_path)
|
||||
|
||||
global_step = ckpt_state.model_checkpoint_path.split('/')[-1].split('-')[-1]
|
||||
if not global_step.isdigit():
|
||||
global_step = 0
|
||||
else:
|
||||
global_step = int(global_step)
|
||||
|
||||
total_prediction, correct_prediction, correct_prediction_top5 = 0, 0, 0
|
||||
start_time = time.time()
|
||||
for _ in six.moves.range(FLAGS.eval_batch_count):
|
||||
(summaries, loss, predictions, truth, train_step) = sess.run(
|
||||
[model.summaries, model.cost, model.predictions,
|
||||
model.labels, model.global_step])
|
||||
|
||||
if not FLAGS.time_inference:
|
||||
for (indiv_truth, indiv_prediction) in zip(truth, predictions):
|
||||
indiv_truth = np.argmax(indiv_truth)
|
||||
top5_prediction = np.argsort(indiv_prediction)[-5:]
|
||||
top1_prediction = np.argsort(indiv_prediction)[-1]
|
||||
correct_prediction += (indiv_truth == top1_prediction)
|
||||
if indiv_truth in top5_prediction:
|
||||
correct_prediction_top5 += 1
|
||||
total_prediction += 1
|
||||
|
||||
if FLAGS.time_inference:
|
||||
print("Time for inference: %.4f" % (time.time() - start_time))
|
||||
else:
|
||||
precision = 1.0 * correct_prediction / total_prediction
|
||||
precision_top5 = 1.0 * correct_prediction_top5 / total_prediction
|
||||
best_precision = max(precision, best_precision)
|
||||
|
||||
precision_summ = tf.Summary()
|
||||
precision_summ.value.add(
|
||||
tag='Precision', simple_value=precision)
|
||||
summary_writer.add_summary(precision_summ, train_step)
|
||||
best_precision_summ = tf.Summary()
|
||||
best_precision_summ.value.add(
|
||||
tag='Best Precision', simple_value=best_precision)
|
||||
summary_writer.add_summary(best_precision_summ, train_step)
|
||||
summary_writer.add_summary(summaries, train_step)
|
||||
print('Precision @ 1 = %.4f, Recall @ 5 = %.4f, Global step = %d' %
|
||||
(precision, precision_top5, global_step))
|
||||
summary_writer.flush()
|
||||
|
||||
if FLAGS.eval_once:
|
||||
break
|
||||
|
||||
time.sleep(60)
|
||||
|
||||
|
||||
def main(_):
|
||||
if FLAGS.model == '':
|
||||
raise Exception('--model must be specified.')
|
||||
|
||||
if FLAGS.num_gpus == 0:
|
||||
dev = '/cpu:0'
|
||||
elif FLAGS.num_gpus == 1:
|
||||
dev = '/gpu:0'
|
||||
else:
|
||||
raise ValueError('Only support 0 or 1 gpu.')
|
||||
|
||||
if FLAGS.batch_size == -1:
|
||||
if FLAGS.mode == 'train':
|
||||
batch_size = 128
|
||||
elif FLAGS.mode == 'eval':
|
||||
batch_size = 100
|
||||
else:
|
||||
batch_size = FLAGS.batch_size
|
||||
|
||||
if FLAGS.dataset == 'cifar10':
|
||||
num_classes = 10
|
||||
elif FLAGS.dataset == 'cifar100':
|
||||
num_classes = 100
|
||||
|
||||
if FLAGS.model == 'resnet20':
|
||||
num_residual_units = 3
|
||||
elif FLAGS.model == 'resnet56':
|
||||
num_residual_units = 9
|
||||
elif FLAGS.model == 'resnet164' and FLAGS.use_bottleneck:
|
||||
num_residual_units = 18
|
||||
elif FLAGS.model == 'resnet164' and not FLAGS.use_bottleneck:
|
||||
num_residual_units = 27
|
||||
else:
|
||||
raise Exception("Invalid model -- only resnet20, resnet56 and resnet164 supported")
|
||||
|
||||
data_format = FLAGS.data_format
|
||||
|
||||
hps = resnet_model.HParams(batch_size=batch_size,
|
||||
num_classes=num_classes,
|
||||
min_lrn_rate=0.0001,
|
||||
lrn_rate=0.1,
|
||||
num_residual_units=num_residual_units,
|
||||
use_bottleneck=FLAGS.use_bottleneck,
|
||||
weight_decay_rate=0.0005,
|
||||
relu_leakiness=0.1,
|
||||
optimizer='mom',
|
||||
data_format=data_format)
|
||||
|
||||
with tf.device(dev):
|
||||
if FLAGS.mode == 'train':
|
||||
train(hps)
|
||||
elif FLAGS.mode == 'eval':
|
||||
evaluate(hps)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.logging.set_verbosity(tf.logging.INFO)
|
||||
tf.app.run()
|
281
tensorflow/CIFAR10/resnet/resnet_model.py
Normal file
281
tensorflow/CIFAR10/resnet/resnet_model.py
Normal file
|
@ -0,0 +1,281 @@
|
|||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
|
||||
"""ResNet model.
|
||||
|
||||
Related papers:
|
||||
https://arxiv.org/pdf/1603.05027v2.pdf
|
||||
https://arxiv.org/pdf/1512.03385v1.pdf
|
||||
https://arxiv.org/pdf/1605.07146v1.pdf
|
||||
"""
|
||||
from collections import namedtuple
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
import six
|
||||
|
||||
from tensorflow.python.training import moving_averages
|
||||
|
||||
|
||||
HParams = namedtuple('HParams',
|
||||
'batch_size, num_classes, min_lrn_rate, lrn_rate, '
|
||||
'num_residual_units, use_bottleneck, weight_decay_rate, '
|
||||
'relu_leakiness, optimizer, data_format')
|
||||
|
||||
|
||||
class ResNet(object):
|
||||
"""ResNet model."""
|
||||
|
||||
def __init__(self, hps, images, labels, mode):
|
||||
"""ResNet constructor.
|
||||
|
||||
Args:
|
||||
hps: Hyperparameters.
|
||||
images: Batches of images. [batch_size, image_size, image_size, 3]
|
||||
labels: Batches of labels. [batch_size, num_classes]
|
||||
mode: One of 'train' and 'eval'.
|
||||
"""
|
||||
self.hps = hps
|
||||
self._images = images
|
||||
self.labels = labels
|
||||
self.mode = mode
|
||||
|
||||
self._extra_train_ops = []
|
||||
|
||||
def build_graph(self):
|
||||
"""Build a whole graph for the model."""
|
||||
self.global_step = tf.contrib.framework.get_or_create_global_step()
|
||||
self._build_model()
|
||||
if self.mode == 'train':
|
||||
self._build_train_op()
|
||||
self.summaries = tf.summary.merge_all()
|
||||
|
||||
def _stride_arr(self, stride):
|
||||
"""Map a stride scalar to the stride array for tf.nn.conv2d."""
|
||||
if self.hps.data_format == 'NHWC':
|
||||
return [1, stride, stride, 1]
|
||||
elif self.hps.data_format == 'NCHW':
|
||||
return [1, 1, stride, stride]
|
||||
else:
|
||||
raise Exception("Invalid data_format")
|
||||
|
||||
def _build_model(self):
|
||||
"""Build the core model within the graph."""
|
||||
with tf.variable_scope('init'):
|
||||
x = self._images
|
||||
x = self._conv('init_conv', x, 3, 3, 16, self._stride_arr(1))
|
||||
|
||||
strides = [1, 2, 2]
|
||||
activate_before_residual = [True, False, False]
|
||||
if self.hps.use_bottleneck:
|
||||
res_func = self._bottleneck_residual
|
||||
filters = [16, 64, 128, 256]
|
||||
else:
|
||||
res_func = self._residual
|
||||
filters = [16, 16, 32, 64]
|
||||
# Uncomment the following codes to use w28-10 wide residual network.
|
||||
# It is more memory efficient than very deep residual network and has
|
||||
# comparably good performance.
|
||||
# https://arxiv.org/pdf/1605.07146v1.pdf
|
||||
# filters = [16, 160, 320, 640]
|
||||
# Update hps.num_residual_units to 4
|
||||
|
||||
with tf.variable_scope('unit_1_0'):
|
||||
x = res_func(x, filters[0], filters[1], self._stride_arr(strides[0]),
|
||||
activate_before_residual[0])
|
||||
for i in six.moves.range(1, self.hps.num_residual_units):
|
||||
with tf.variable_scope('unit_1_%d' % i):
|
||||
x = res_func(x, filters[1], filters[1], self._stride_arr(1), False)
|
||||
|
||||
with tf.variable_scope('unit_2_0'):
|
||||
x = res_func(x, filters[1], filters[2], self._stride_arr(strides[1]),
|
||||
activate_before_residual[1])
|
||||
for i in six.moves.range(1, self.hps.num_residual_units):
|
||||
with tf.variable_scope('unit_2_%d' % i):
|
||||
x = res_func(x, filters[2], filters[2], self._stride_arr(1), False)
|
||||
|
||||
with tf.variable_scope('unit_3_0'):
|
||||
x = res_func(x, filters[2], filters[3], self._stride_arr(strides[2]),
|
||||
activate_before_residual[2])
|
||||
for i in six.moves.range(1, self.hps.num_residual_units):
|
||||
with tf.variable_scope('unit_3_%d' % i):
|
||||
x = res_func(x, filters[3], filters[3], self._stride_arr(1), False)
|
||||
|
||||
with tf.variable_scope('unit_last'):
|
||||
x = self._batch_norm('final_bn', x)
|
||||
x = self._relu(x, self.hps.relu_leakiness)
|
||||
x = self._global_avg_pool(x)
|
||||
|
||||
with tf.variable_scope('logit'):
|
||||
logits = self._fully_connected(x, self.hps.num_classes)
|
||||
self.predictions = tf.nn.softmax(logits)
|
||||
|
||||
with tf.variable_scope('costs'):
|
||||
xent = tf.nn.softmax_cross_entropy_with_logits(
|
||||
logits=logits, labels=self.labels)
|
||||
self.cost = tf.reduce_mean(xent, name='xent')
|
||||
self.cost += self._decay()
|
||||
|
||||
tf.summary.scalar('cost', self.cost)
|
||||
|
||||
def _build_train_op(self):
|
||||
"""Build training specific ops for the graph."""
|
||||
self.lrn_rate = tf.constant(self.hps.lrn_rate, tf.float32)
|
||||
tf.summary.scalar('learning_rate', self.lrn_rate)
|
||||
|
||||
trainable_variables = tf.trainable_variables()
|
||||
grads = tf.gradients(self.cost, trainable_variables)
|
||||
|
||||
if self.hps.optimizer == 'sgd':
|
||||
optimizer = tf.train.GradientDescentOptimizer(self.lrn_rate)
|
||||
elif self.hps.optimizer == 'mom':
|
||||
optimizer = tf.train.MomentumOptimizer(self.lrn_rate, 0.9)
|
||||
|
||||
apply_op = optimizer.apply_gradients(
|
||||
zip(grads, trainable_variables),
|
||||
global_step=self.global_step, name='train_step')
|
||||
|
||||
train_ops = [apply_op] + self._extra_train_ops
|
||||
self.train_op = tf.group(*train_ops)
|
||||
|
||||
# TODO(xpan): Consider batch_norm in contrib/layers/python/layers/layers.py
|
||||
def _batch_norm(self, name, x):
|
||||
"""Batch normalization."""
|
||||
with tf.variable_scope(name) as scope:
|
||||
output = tf.contrib.layers.batch_norm(x,
|
||||
decay=0.9,
|
||||
epsilon=0.001,
|
||||
data_format=self.hps.data_format,
|
||||
scope=scope,
|
||||
is_training=(self.mode == 'train'),
|
||||
fused=True,
|
||||
updates_collections=None)
|
||||
return output
|
||||
|
||||
def _residual(self, x, in_filter, out_filter, stride,
|
||||
activate_before_residual=False):
|
||||
"""Residual unit with 2 sub layers."""
|
||||
if activate_before_residual:
|
||||
with tf.variable_scope('shared_activation'):
|
||||
x = self._batch_norm('init_bn', x)
|
||||
x = self._relu(x, self.hps.relu_leakiness)
|
||||
orig_x = x
|
||||
else:
|
||||
with tf.variable_scope('residual_only_activation'):
|
||||
orig_x = x
|
||||
x = self._batch_norm('init_bn', x)
|
||||
x = self._relu(x, self.hps.relu_leakiness)
|
||||
|
||||
with tf.variable_scope('sub1'):
|
||||
x = self._conv('conv1', x, 3, in_filter, out_filter, stride)
|
||||
|
||||
with tf.variable_scope('sub2'):
|
||||
x = self._batch_norm('bn2', x)
|
||||
x = self._relu(x, self.hps.relu_leakiness)
|
||||
x = self._conv('conv2', x, 3, out_filter, out_filter, [1, 1, 1, 1])
|
||||
|
||||
with tf.variable_scope('sub_add'):
|
||||
if in_filter != out_filter:
|
||||
orig_x = tf.nn.avg_pool(orig_x, stride, stride, 'VALID',
|
||||
data_format=self.hps.data_format)
|
||||
if self.hps.data_format == 'NHWC':
|
||||
orig_x = tf.pad(
|
||||
orig_x, [[0, 0], [0, 0], [0, 0],
|
||||
[(out_filter-in_filter)//2, (out_filter-in_filter)//2]])
|
||||
elif self.hps.data_format == 'NCHW':
|
||||
orig_x = tf.pad(
|
||||
orig_x, [[0, 0], [(out_filter-in_filter)//2, (out_filter-in_filter)//2],
|
||||
[0, 0], [0, 0]])
|
||||
x += orig_x
|
||||
|
||||
tf.logging.debug('image after unit %s', x.get_shape())
|
||||
return x
|
||||
|
||||
def _bottleneck_residual(self, x, in_filter, out_filter, stride,
|
||||
activate_before_residual=False):
|
||||
"""Bottleneck residual unit with 3 sub layers."""
|
||||
if activate_before_residual:
|
||||
with tf.variable_scope('common_bn_relu'):
|
||||
x = self._batch_norm('init_bn', x)
|
||||
x = self._relu(x, self.hps.relu_leakiness)
|
||||
orig_x = x
|
||||
else:
|
||||
with tf.variable_scope('residual_bn_relu'):
|
||||
orig_x = x
|
||||
x = self._batch_norm('init_bn', x)
|
||||
x = self._relu(x, self.hps.relu_leakiness)
|
||||
|
||||
with tf.variable_scope('sub1'):
|
||||
x = self._conv('conv1', x, 1, in_filter, out_filter/4, stride)
|
||||
|
||||
with tf.variable_scope('sub2'):
|
||||
x = self._batch_norm('bn2', x)
|
||||
x = self._relu(x, self.hps.relu_leakiness)
|
||||
x = self._conv('conv2', x, 3, out_filter/4, out_filter/4, [1, 1, 1, 1])
|
||||
|
||||
with tf.variable_scope('sub3'):
|
||||
x = self._batch_norm('bn3', x)
|
||||
x = self._relu(x, self.hps.relu_leakiness)
|
||||
x = self._conv('conv3', x, 1, out_filter/4, out_filter, [1, 1, 1, 1])
|
||||
|
||||
with tf.variable_scope('sub_add'):
|
||||
if in_filter != out_filter:
|
||||
orig_x = self._conv('project', orig_x, 1, in_filter, out_filter, stride)
|
||||
x += orig_x
|
||||
|
||||
tf.logging.info('image after unit %s', x.get_shape())
|
||||
return x
|
||||
|
||||
def _decay(self):
|
||||
"""L2 weight decay loss."""
|
||||
costs = []
|
||||
for var in tf.trainable_variables():
|
||||
if var.op.name.find(r'DW') > 0:
|
||||
costs.append(tf.nn.l2_loss(var))
|
||||
# tf.summary.histogram(var.op.name, var)
|
||||
|
||||
return tf.multiply(self.hps.weight_decay_rate, tf.add_n(costs))
|
||||
|
||||
def _conv(self, name, x, filter_size, in_filters, out_filters, strides):
|
||||
"""Convolution."""
|
||||
with tf.variable_scope(name):
|
||||
n = filter_size * filter_size * out_filters
|
||||
kernel = tf.get_variable(
|
||||
'DW', [filter_size, filter_size, in_filters, out_filters],
|
||||
tf.float32, initializer=tf.random_normal_initializer(
|
||||
stddev=np.sqrt(2.0/n)))
|
||||
return tf.nn.conv2d(x, kernel, strides, padding='SAME',
|
||||
data_format=self.hps.data_format)
|
||||
|
||||
def _relu(self, x, leakiness=0.0):
|
||||
"""Relu, with optional leaky support."""
|
||||
return tf.where(tf.less(x, 0.0), leakiness * x, x, name='leaky_relu')
|
||||
|
||||
def _fully_connected(self, x, out_dim):
|
||||
"""FullyConnected layer for final output."""
|
||||
x = tf.reshape(x, [self.hps.batch_size, -1])
|
||||
w = tf.get_variable(
|
||||
'DW', [x.get_shape()[1], out_dim],
|
||||
initializer=tf.uniform_unit_scaling_initializer(factor=1.0))
|
||||
b = tf.get_variable('biases', [out_dim],
|
||||
initializer=tf.constant_initializer())
|
||||
return tf.nn.xw_plus_b(x, w, b)
|
||||
|
||||
def _global_avg_pool(self, x):
|
||||
assert x.get_shape().ndims == 4
|
||||
if self.hps.data_format == 'NHWC':
|
||||
return tf.reduce_mean(x, [1, 2])
|
||||
elif self.hps.data_format == 'NCHW':
|
||||
return tf.reduce_mean(x, [2, 3])
|
51
tensorflow/CIFAR10/time_inference.py
Normal file
51
tensorflow/CIFAR10/time_inference.py
Normal file
|
@ -0,0 +1,51 @@
|
|||
import argparse
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
def main(checkpoint_path, model, use_bottleneck):
|
||||
print("Number of images\tInference time")
|
||||
num_trials = 10
|
||||
for batch_size in [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192]:
|
||||
command = ("python3 resnet/resnet_main.py --mode=eval --eval_data_path=cifar10/test_batch.bin "
|
||||
"--eval_dir=data/%(model)s/log_root/eval --dataset='cifar10' --model=%(model)s "
|
||||
"--use_bottleneck=%(use_bottleneck)s --eval_batch_count=%(num_trials)d --eval_once=True --num_gpus=1 "
|
||||
"--data_format=NHWC --time_inference=True --eval_batch_count=1 --batch_size=%(batch_size)d" %
|
||||
{"model": model, "use_bottleneck": "True" if use_bottleneck else "False", "batch_size": batch_size,
|
||||
"num_trials": num_trials})
|
||||
full_command = command + " --log_root=%s 2>/dev/null" % checkpoint_path
|
||||
try:
|
||||
output = subprocess.check_output(full_command, shell=True)
|
||||
output = output.decode('utf8').strip()
|
||||
for line in output.split('\n'):
|
||||
if "Time for inference" in line:
|
||||
line = line.strip()
|
||||
inference_time = float(line.split(": ")[1]) / num_trials
|
||||
stats = [batch_size, inference_time]
|
||||
print("\t".join([str(stat) for stat in stats]))
|
||||
sys.stdout.flush()
|
||||
except:
|
||||
stats = [batch_size, ""]
|
||||
print("\t".join([str(stat) for stat in stats]))
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(
|
||||
description=("Backup model checkpoints periodically")
|
||||
)
|
||||
parser.add_argument('-i', "--checkpoint_path", type=str, required=True,
|
||||
help="Path to dumped model checkpoints")
|
||||
parser.add_argument('-m', "--model", type=str, required=True,
|
||||
help="Model name")
|
||||
parser.add_argument('-b', "--use_bottleneck", type=bool, default=False,
|
||||
help="Use bottleneck")
|
||||
|
||||
cmdline_args = parser.parse_args()
|
||||
opt_dict = vars(cmdline_args)
|
||||
|
||||
checkpoint_path = opt_dict["checkpoint_path"]
|
||||
model = opt_dict["model"]
|
||||
use_bottleneck = opt_dict["use_bottleneck"]
|
||||
|
||||
main(checkpoint_path, model, use_bottleneck)
|
3
tensorflow/SQuAD/.gitignore
vendored
Normal file
3
tensorflow/SQuAD/.gitignore
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
out/
|
||||
data/
|
||||
*/__pycache__/
|
165
tensorflow/SQuAD/README.md
Normal file
165
tensorflow/SQuAD/README.md
Normal file
|
@ -0,0 +1,165 @@
|
|||
# Bi-directional Attention Flow for Machine Comprehension
|
||||
|
||||
- This the original implementation of [Bi-directional Attention Flow for Machine Comprehension][paper] (Seo et al., 2016).
|
||||
- This is tensorflow v1.1.0 comaptible version. This is not compatible with previous trained models,
|
||||
so if you want to use them, go to [v0.2.1][v0.2.1].
|
||||
- The CodaLab worksheet for the [SQuAD Leaderboard][squad] submission is available [here][worksheet].
|
||||
- Please contact [Minjoon Seo][minjoon] ([@seominjoon][minjoon-github]) for questions and suggestions.
|
||||
|
||||
## 0. Requirements
|
||||
#### General
|
||||
- Python (developed on 3.5.2. Issues have been reported with Python 2!)
|
||||
- unzip
|
||||
|
||||
#### Python Packages
|
||||
- tensorflow (deep learning library, verified on 1.1.0)
|
||||
- nltk (NLP tools, verified on 3.2.1)
|
||||
- tqdm (progress bar, verified on 4.7.4)
|
||||
- jinja2 (for visaulization; if you only train and test, not needed)
|
||||
|
||||
## 1. Pre-processing
|
||||
First, prepare data. Donwload SQuAD data and GloVe and nltk corpus
|
||||
(~850 MB, this will download files to `$HOME/data`):
|
||||
```
|
||||
chmod +x download.sh; ./download.sh
|
||||
```
|
||||
|
||||
Second, Preprocess Stanford QA dataset (along with GloVe vectors) and save them in `$PWD/data/squad` (~5 minutes):
|
||||
```
|
||||
python -m squad.prepro
|
||||
```
|
||||
|
||||
## 2. Training
|
||||
The model was trained with NVidia Titan X (Pascal Architecture, 2016).
|
||||
The model requires at least 12GB of GPU RAM.
|
||||
If your GPU RAM is smaller than 12GB, you can either decrease batch size (performance might degrade),
|
||||
or you can use multi GPU (see below).
|
||||
The training converges at ~18k steps, and it took ~4s per step (i.e. ~20 hours).
|
||||
|
||||
Before training, it is recommended to first try the following code to verify everything is okay and memory is sufficient:
|
||||
```
|
||||
python -m basic.cli --mode train --noload --debug
|
||||
```
|
||||
|
||||
Then to fully train, run:
|
||||
```
|
||||
python -m basic.cli --mode train --noload
|
||||
```
|
||||
|
||||
You can speed up the training process with optimization flags:
|
||||
```
|
||||
python -m basic.cli --mode train --noload --len_opt --cluster
|
||||
```
|
||||
You can still omit them, but training will be much slower.
|
||||
|
||||
|
||||
## 3. Test
|
||||
To test, run:
|
||||
```
|
||||
python -m basic.cli
|
||||
```
|
||||
|
||||
Similarly to training, you can give the optimization flags to speed up test (5 minutes on dev data):
|
||||
```
|
||||
python -m basic.cli --len_opt --cluster
|
||||
```
|
||||
|
||||
This command loads the most recently saved model during training and begins testing on the test data.
|
||||
After the process ends, it prints F1 and EM scores, and also outputs a json file (`$PWD/out/basic/00/answer/test-####.json`,
|
||||
where `####` is the step # that the model was saved).
|
||||
Note that the printed scores are not official (our scoring scheme is a bit harsher).
|
||||
To obtain the official number, use the official evaluator (copied in `squad` folder) and the output json file:
|
||||
|
||||
```
|
||||
python squad/evaluate-v1.1.py $HOME/data/squad/dev-v1.1.json out/basic/00/answer/test-####.json
|
||||
```
|
||||
|
||||
### 3.1 Loading from pre-trained weights
|
||||
NOTE: this version is not compatible with the following trained models.
|
||||
For compatibility, use [v0.2.1][v0.2.1].
|
||||
|
||||
Instead of training the model yourself, you can choose to use pre-trained weights that were used for [SQuAD Leaderboard][squad] submission.
|
||||
Refer to [this worksheet][worksheet] in CodaLab to reproduce the results.
|
||||
If you are unfamiliar with CodaLab, follow these simple steps (given that you met all prereqs above):
|
||||
|
||||
1. Download `save.zip` from the [worksheet][worksheet] and unzip it in the current directory.
|
||||
2. Copy `glove.6B.100d.txt` from your glove data folder (`$HOME/data/glove/`) to the current directory.
|
||||
3. To reproduce single model:
|
||||
|
||||
```
|
||||
basic/run_single.sh $HOME/data/squad/dev-v1.1.json single.json
|
||||
```
|
||||
|
||||
This writes the answers to `single.json` in the current directory. You can then use the official evaluator to obtain EM and F1 scores. If you want to run on GPU (~5 mins), change the value of batch_size flag in the shell file to a higher number (60 for 12GB GPU RAM).
|
||||
4. Similarly, to reproduce ensemble method:
|
||||
|
||||
```
|
||||
basic/run_ensemble.sh $HOME/data/squad/dev-v1.1.json ensemble.json
|
||||
```
|
||||
If you want to run on GPU, you should run the script sequentially by removing '&' in the forloop, or you will need to specify different GPUs for each run of the for loop.
|
||||
|
||||
## Results
|
||||
|
||||
### Dev Data
|
||||
|
||||
| | EM (%) | F1 (%) |
|
||||
| -------- |:------:|:------:|
|
||||
| single | 67.8 | 77.4 |
|
||||
|
||||
###Dev Data (old)
|
||||
NOTE: These numbers are from [v0.2.1][v0.2.1].
|
||||
|
||||
| | EM (%) | F1 (%) |
|
||||
| -------- |:------:|:------:|
|
||||
| single | 67.7 | 77.3 |
|
||||
| ensemble | 72.6 | 80.7 |
|
||||
|
||||
|
||||
###Test Data (old)
|
||||
NOTE: These numbers are from [v0.2.1][v0.2.1].
|
||||
|
||||
| | EM (%) | F1 (%) |
|
||||
| -------- |:------:|:------:|
|
||||
| single | 68.0 | 77.3 |
|
||||
| ensemble | 73.3 | 81.1 |
|
||||
|
||||
Refer to [our paper][paper] for more details.
|
||||
See [SQuAD Leaderboard][squad] to compare with other models.
|
||||
|
||||
|
||||
<!--
|
||||
## Using Pre-trained Model
|
||||
|
||||
If you would like to use pre-trained model, it's very easy!
|
||||
You can download the model weights [here][save] (make sure that its commit id matches the source code's).
|
||||
Extract them and put them in `$PWD/out/basic/00/save` directory, with names unchanged.
|
||||
Then do the testing again, but you need to specify the step # that you are loading from:
|
||||
```
|
||||
python -m basic.cli --mode test --batch_size 8 --eval_num_batches 0 --load_step ####
|
||||
```
|
||||
-->
|
||||
|
||||
|
||||
## Multi-GPU Training & Testing
|
||||
Our model supports multi-GPU training.
|
||||
We follow the parallelization paradigm described in [TensorFlow Tutorial][multi-gpu].
|
||||
In short, if you want to use batch size of 60 (default) but if you have 3 GPUs with 4GB of RAM,
|
||||
then you initialize each GPU with batch size of 20, and combine the gradients on CPU.
|
||||
This can be easily done by running:
|
||||
```
|
||||
python -m basic.cli --mode train --noload --num_gpus 3 --batch_size 20
|
||||
```
|
||||
|
||||
Similarly, you can speed up your testing by:
|
||||
```
|
||||
python -m basic.cli --num_gpus 3 --batch_size 20
|
||||
```
|
||||
|
||||
|
||||
[multi-gpu]: https://www.tensorflow.org/versions/r0.11/tutorials/deep_cnn/index.html#training-a-model-using-multiple-gpu-cards
|
||||
[squad]: http://stanford-qa.com
|
||||
[paper]: https://arxiv.org/abs/1611.01603
|
||||
[worksheet]: https://worksheets.codalab.org/worksheets/0x37a9b8c44f6845c28866267ef941c89d/
|
||||
[minjoon]: https://seominjoon.github.io
|
||||
[minjoon-github]: https://github.com/seominjoon
|
||||
[v0.2.1]: https://github.com/allenai/bi-att-flow/tree/v0.2.1
|
0
tensorflow/SQuAD/basic/__init__.py
Normal file
0
tensorflow/SQuAD/basic/__init__.py
Normal file
112
tensorflow/SQuAD/basic/cli.py
Normal file
112
tensorflow/SQuAD/basic/cli.py
Normal file
|
@ -0,0 +1,112 @@
|
|||
import os
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from basic.main import main as m
|
||||
|
||||
flags = tf.app.flags
|
||||
|
||||
# Names and directories
|
||||
flags.DEFINE_string("model_name", "basic", "Model name [basic]")
|
||||
flags.DEFINE_string("data_dir", "data/squad", "Data dir [data/squad]")
|
||||
flags.DEFINE_string("run_id", "0", "Run ID [0]")
|
||||
flags.DEFINE_string("out_base_dir", "out", "out base dir [out]")
|
||||
flags.DEFINE_string("forward_name", "single", "Forward name [single]")
|
||||
flags.DEFINE_string("answer_path", "", "Answer path []")
|
||||
flags.DEFINE_string("eval_path", "", "Eval path []")
|
||||
flags.DEFINE_string("load_path", "", "Load path []")
|
||||
flags.DEFINE_string("shared_path", "", "Shared path []")
|
||||
|
||||
# Device placement
|
||||
flags.DEFINE_string("device", "/cpu:0", "default device for summing gradients. [/cpu:0]")
|
||||
flags.DEFINE_string("device_type", "gpu", "device for computing gradients (parallelization). cpu | gpu [gpu]")
|
||||
flags.DEFINE_integer("num_gpus", 1, "num of gpus or cpus for computing gradients [1]")
|
||||
|
||||
# Essential training and test options
|
||||
flags.DEFINE_string("mode", "test", "trains | test | forward [test]")
|
||||
flags.DEFINE_boolean("load", True, "load saved data? [True]")
|
||||
flags.DEFINE_bool("single", False, "supervise only the answer sentence? [False]")
|
||||
flags.DEFINE_boolean("debug", False, "Debugging mode? [False]")
|
||||
flags.DEFINE_bool('load_ema', True, "load exponential average of variables when testing? [True]")
|
||||
flags.DEFINE_bool("eval", True, "eval? [True]")
|
||||
flags.DEFINE_bool("wy", False, "Use wy for loss / eval? [False]")
|
||||
flags.DEFINE_bool("na", False, "Enable no answer strategy and learn bias? [False]")
|
||||
flags.DEFINE_float("th", 0.5, "Threshold [0.5]")
|
||||
|
||||
# Training / test parameters
|
||||
flags.DEFINE_integer("batch_size", 60, "Batch size [60]")
|
||||
flags.DEFINE_integer("val_num_batches", 100, "validation num batches [100]")
|
||||
flags.DEFINE_integer("test_num_batches", 0, "test num batches [0]")
|
||||
flags.DEFINE_integer("num_epochs", 12, "Total number of epochs for training [12]")
|
||||
flags.DEFINE_integer("num_steps", 20000, "Number of steps [20000]")
|
||||
flags.DEFINE_integer("load_step", 0, "load step [0]")
|
||||
flags.DEFINE_float("init_lr", 0.001, "Initial learning rate [0.001]")
|
||||
flags.DEFINE_float("input_keep_prob", 0.8, "Input keep prob for the dropout of LSTM weights [0.8]")
|
||||
flags.DEFINE_float("keep_prob", 0.8, "Keep prob for the dropout of Char-CNN weights [0.8]")
|
||||
flags.DEFINE_float("wd", 0.0, "L2 weight decay for regularization [0.0]")
|
||||
flags.DEFINE_integer("hidden_size", 100, "Hidden size [100]")
|
||||
flags.DEFINE_integer("char_out_size", 100, "char-level word embedding size [100]")
|
||||
flags.DEFINE_integer("char_emb_size", 8, "Char emb size [8]")
|
||||
flags.DEFINE_string("out_channel_dims", "100", "Out channel dims of Char-CNN, separated by commas [100]")
|
||||
flags.DEFINE_string("filter_heights", "5", "Filter heights of Char-CNN, separated by commas [5]")
|
||||
flags.DEFINE_bool("finetune", False, "Finetune word embeddings? [False]")
|
||||
flags.DEFINE_bool("highway", True, "Use highway? [True]")
|
||||
flags.DEFINE_integer("highway_num_layers", 2, "highway num layers [2]")
|
||||
flags.DEFINE_bool("share_cnn_weights", True, "Share Char-CNN weights [True]")
|
||||
flags.DEFINE_bool("share_lstm_weights", True, "Share pre-processing (phrase-level) LSTM weights [True]")
|
||||
flags.DEFINE_float("var_decay", 0.999, "Exponential moving average decay for variables [0.999]")
|
||||
|
||||
# Optimizations
|
||||
flags.DEFINE_bool("cluster", False, "Cluster data for faster training [False]")
|
||||
flags.DEFINE_bool("len_opt", False, "Length optimization? [False]")
|
||||
flags.DEFINE_bool("cpu_opt", False, "CPU optimization? GPU computation can be slower [False]")
|
||||
|
||||
# Logging and saving options
|
||||
flags.DEFINE_boolean("progress", True, "Show progress? [True]")
|
||||
flags.DEFINE_integer("log_period", 100, "Log period [100]")
|
||||
flags.DEFINE_integer("eval_period", 1000, "Eval period [1000]")
|
||||
flags.DEFINE_integer("save_period", 1000, "Save Period [1000]")
|
||||
flags.DEFINE_integer("max_to_keep", 20, "Max recent saves to keep [20]")
|
||||
flags.DEFINE_bool("dump_eval", True, "dump eval? [True]")
|
||||
flags.DEFINE_bool("dump_answer", True, "dump answer? [True]")
|
||||
flags.DEFINE_bool("vis", False, "output visualization numbers? [False]")
|
||||
flags.DEFINE_bool("dump_pickle", True, "Dump pickle instead of json? [True]")
|
||||
flags.DEFINE_float("decay", 0.9, "Exponential moving average decay for logging values [0.9]")
|
||||
|
||||
# Thresholds for speed and less memory usage
|
||||
flags.DEFINE_integer("word_count_th", 10, "word count th [100]")
|
||||
flags.DEFINE_integer("char_count_th", 50, "char count th [500]")
|
||||
flags.DEFINE_integer("sent_size_th", 400, "sent size th [64]")
|
||||
flags.DEFINE_integer("num_sents_th", 8, "num sents th [8]")
|
||||
flags.DEFINE_integer("ques_size_th", 30, "ques size th [32]")
|
||||
flags.DEFINE_integer("word_size_th", 16, "word size th [16]")
|
||||
flags.DEFINE_integer("para_size_th", 256, "para size th [256]")
|
||||
|
||||
# Advanced training options
|
||||
flags.DEFINE_bool("lower_word", True, "lower word [True]")
|
||||
flags.DEFINE_bool("squash", False, "squash the sentences into one? [False]")
|
||||
flags.DEFINE_bool("swap_memory", True, "swap memory? [True]")
|
||||
flags.DEFINE_string("data_filter", "max", "max | valid | semi [max]")
|
||||
flags.DEFINE_bool("use_glove_for_unk", True, "use glove for unk [False]")
|
||||
flags.DEFINE_bool("known_if_glove", True, "consider as known if present in glove [False]")
|
||||
flags.DEFINE_string("logit_func", "tri_linear", "logit func [tri_linear]")
|
||||
flags.DEFINE_string("answer_func", "linear", "answer logit func [linear]")
|
||||
flags.DEFINE_string("sh_logit_func", "tri_linear", "sh logit func [tri_linear]")
|
||||
|
||||
# Ablation options
|
||||
flags.DEFINE_bool("use_char_emb", True, "use char emb? [True]")
|
||||
flags.DEFINE_bool("use_word_emb", True, "use word embedding? [True]")
|
||||
flags.DEFINE_bool("q2c_att", True, "question-to-context attention? [True]")
|
||||
flags.DEFINE_bool("c2q_att", True, "context-to-question attention? [True]")
|
||||
flags.DEFINE_bool("dynamic_att", False, "Dynamic attention [False]")
|
||||
|
||||
|
||||
def main(_):
|
||||
config = flags.FLAGS
|
||||
|
||||
config.out_dir = os.path.join(config.out_base_dir, config.model_name, str(config.run_id).zfill(2))
|
||||
|
||||
m(config)
|
||||
|
||||
if __name__ == "__main__":
|
||||
tf.app.run()
|
116
tensorflow/SQuAD/basic/ensemble.py
Normal file
116
tensorflow/SQuAD/basic/ensemble.py
Normal file
|
@ -0,0 +1,116 @@
|
|||
import argparse
|
||||
import functools
|
||||
import gzip
|
||||
import json
|
||||
import pickle
|
||||
from collections import defaultdict
|
||||
from operator import mul
|
||||
|
||||
from tqdm import tqdm
|
||||
from squad.utils import get_phrase, get_best_span, get_span_score_pairs
|
||||
|
||||
|
||||
def get_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('paths', nargs='+')
|
||||
parser.add_argument('-o', '--out', default='ensemble.json')
|
||||
parser.add_argument("--data_path", default="data/squad/data_test.json")
|
||||
parser.add_argument("--shared_path", default="data/squad/shared_test.json")
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def ensemble(args):
|
||||
e_list = []
|
||||
for path in tqdm(args.paths):
|
||||
with gzip.open(path, 'r') as fh:
|
||||
e = pickle.load(fh)
|
||||
e_list.append(e)
|
||||
|
||||
with open(args.data_path, 'r') as fh:
|
||||
data = json.load(fh)
|
||||
|
||||
with open(args.shared_path, 'r') as fh:
|
||||
shared = json.load(fh)
|
||||
|
||||
out = {}
|
||||
for idx, (id_, rx) in tqdm(enumerate(zip(data['ids'], data['*x'])), total=len(e['yp'])):
|
||||
if idx >= len(e['yp']):
|
||||
# for debugging purpose
|
||||
break
|
||||
context = shared['p'][rx[0]][rx[1]]
|
||||
wordss = shared['x'][rx[0]][rx[1]]
|
||||
yp_list = [e['yp'][idx] for e in e_list]
|
||||
yp2_list = [e['yp2'][idx] for e in e_list]
|
||||
answer = ensemble4(context, wordss, yp_list, yp2_list)
|
||||
out[id_] = answer
|
||||
|
||||
with open(args.out, 'w') as fh:
|
||||
json.dump(out, fh)
|
||||
|
||||
|
||||
def ensemble1(context, wordss, y1_list, y2_list):
|
||||
"""
|
||||
|
||||
:param context: Original context
|
||||
:param wordss: tokenized words (nested 2D list)
|
||||
:param y1_list: list of start index probs (each element corresponds to probs form single model)
|
||||
:param y2_list: list of stop index probs
|
||||
:return:
|
||||
"""
|
||||
sum_y1 = combine_y_list(y1_list)
|
||||
sum_y2 = combine_y_list(y2_list)
|
||||
span, score = get_best_span(sum_y1, sum_y2)
|
||||
return get_phrase(context, wordss, span)
|
||||
|
||||
|
||||
def ensemble2(context, wordss, y1_list, y2_list):
|
||||
start_dict = defaultdict(float)
|
||||
stop_dict = defaultdict(float)
|
||||
for y1, y2 in zip(y1_list, y2_list):
|
||||
span, score = get_best_span(y1, y2)
|
||||
start_dict[span[0]] += y1[span[0][0]][span[0][1]]
|
||||
stop_dict[span[1]] += y2[span[1][0]][span[1][1]]
|
||||
start = max(start_dict.items(), key=lambda pair: pair[1])[0]
|
||||
stop = max(stop_dict.items(), key=lambda pair: pair[1])[0]
|
||||
best_span = (start, stop)
|
||||
return get_phrase(context, wordss, best_span)
|
||||
|
||||
|
||||
def ensemble3(context, wordss, y1_list, y2_list):
|
||||
d = defaultdict(float)
|
||||
for y1, y2 in zip(y1_list, y2_list):
|
||||
span, score = get_best_span(y1, y2)
|
||||
phrase = get_phrase(context, wordss, span)
|
||||
d[phrase] += score
|
||||
return max(d.items(), key=lambda pair: pair[1])[0]
|
||||
|
||||
|
||||
def ensemble4(context, wordss, y1_list, y2_list):
|
||||
d = defaultdict(lambda: 0.0)
|
||||
for y1, y2 in zip(y1_list, y2_list):
|
||||
for span, score in get_span_score_pairs(y1, y2):
|
||||
d[span] += score
|
||||
span = max(d.items(), key=lambda pair: pair[1])[0]
|
||||
phrase = get_phrase(context, wordss, span)
|
||||
return phrase
|
||||
|
||||
|
||||
def combine_y_list(y_list, op='*'):
|
||||
if op == '+':
|
||||
func = sum
|
||||
elif op == '*':
|
||||
def func(l): return functools.reduce(mul, l)
|
||||
else:
|
||||
func = op
|
||||
return [[func(yij_list) for yij_list in zip(*yi_list)] for yi_list in zip(*y_list)]
|
||||
|
||||
|
||||
def main():
|
||||
args = get_args()
|
||||
ensemble(args)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
|
39
tensorflow/SQuAD/basic/ensemble_fast.py
Normal file
39
tensorflow/SQuAD/basic/ensemble_fast.py
Normal file
|
@ -0,0 +1,39 @@
|
|||
import sys
|
||||
import json
|
||||
from collections import Counter, defaultdict
|
||||
import re
|
||||
|
||||
def key_func(pair):
|
||||
return pair[1]
|
||||
|
||||
|
||||
def get_func(vals, probs):
|
||||
counter = Counter(vals)
|
||||
# return max(zip(vals, probs), key=lambda pair: pair[1])[0]
|
||||
# return max(zip(vals, probs), key=lambda pair: pair[1] * counter[pair[0]] / len(counter) - 999 * (len(pair[0]) == 0) )[0]
|
||||
# return max(zip(vals, probs), key=lambda pair: pair[1] + 0.7 * counter[pair[0]] / len(counter) - 999 * (len(pair[0]) == 0) )[0]
|
||||
d = defaultdict(float)
|
||||
for val, prob in zip(vals, probs):
|
||||
d[val] += prob
|
||||
d[''] = 0
|
||||
return max(d.items(), key=lambda pair: pair[1])[0]
|
||||
|
||||
third_path = sys.argv[1]
|
||||
other_paths = sys.argv[2:]
|
||||
|
||||
others = [json.load(open(path, 'r')) for path in other_paths]
|
||||
|
||||
|
||||
c = {}
|
||||
|
||||
assert min(map(len, others)) == max(map(len, others)), list(map(len, others))
|
||||
|
||||
for key in others[0].keys():
|
||||
if key == 'scores':
|
||||
continue
|
||||
probs = [other['scores'][key] for other in others]
|
||||
vals = [other[key] for other in others]
|
||||
largest_val = get_func(vals, probs)
|
||||
c[key] = largest_val
|
||||
|
||||
json.dump(c, open(third_path, 'w'))
|
453
tensorflow/SQuAD/basic/evaluator.py
Normal file
453
tensorflow/SQuAD/basic/evaluator.py
Normal file
|
@ -0,0 +1,453 @@
|
|||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
from basic.read_data import DataSet
|
||||
from my.nltk_utils import span_f1
|
||||
from my.tensorflow import padded_reshape
|
||||
from my.utils import argmax
|
||||
from squad.utils import get_phrase, get_best_span, get_best_span_wy
|
||||
|
||||
|
||||
class Evaluation(object):
|
||||
def __init__(self, data_type, global_step, idxs, yp, tensor_dict=None):
|
||||
self.data_type = data_type
|
||||
self.global_step = global_step
|
||||
self.idxs = idxs
|
||||
self.yp = yp
|
||||
self.num_examples = len(yp)
|
||||
self.tensor_dict = None
|
||||
self.dict = {'data_type': data_type,
|
||||
'global_step': global_step,
|
||||
'yp': yp,
|
||||
'idxs': idxs,
|
||||
'num_examples': self.num_examples}
|
||||
if tensor_dict is not None:
|
||||
self.tensor_dict = {key: val.tolist() for key, val in tensor_dict.items()}
|
||||
for key, val in self.tensor_dict.items():
|
||||
self.dict[key] = val
|
||||
self.summaries = None
|
||||
|
||||
def __repr__(self):
|
||||
return "{} step {}".format(self.data_type, self.global_step)
|
||||
|
||||
def __add__(self, other):
|
||||
if other == 0:
|
||||
return self
|
||||
assert self.data_type == other.data_type
|
||||
assert self.global_step == other.global_step
|
||||
new_yp = self.yp + other.yp
|
||||
new_idxs = self.idxs + other.idxs
|
||||
new_tensor_dict = None
|
||||
if self.tensor_dict is not None:
|
||||
new_tensor_dict = {key: val + other.tensor_dict[key] for key, val in self.tensor_dict.items()}
|
||||
return Evaluation(self.data_type, self.global_step, new_idxs, new_yp, tensor_dict=new_tensor_dict)
|
||||
|
||||
def __radd__(self, other):
|
||||
return self.__add__(other)
|
||||
|
||||
|
||||
class LabeledEvaluation(Evaluation):
|
||||
def __init__(self, data_type, global_step, idxs, yp, y, tensor_dict=None):
|
||||
super(LabeledEvaluation, self).__init__(data_type, global_step, idxs, yp, tensor_dict=tensor_dict)
|
||||
self.y = y
|
||||
self.dict['y'] = y
|
||||
|
||||
def __add__(self, other):
|
||||
if other == 0:
|
||||
return self
|
||||
assert self.data_type == other.data_type
|
||||
assert self.global_step == other.global_step
|
||||
new_yp = self.yp + other.yp
|
||||
new_y = self.y + other.y
|
||||
new_idxs = self.idxs + other.idxs
|
||||
if self.tensor_dict is not None:
|
||||
new_tensor_dict = {key: np.concatenate((val, other.tensor_dict[key]), axis=0) for key, val in self.tensor_dict.items()}
|
||||
return LabeledEvaluation(self.data_type, self.global_step, new_idxs, new_yp, new_y, tensor_dict=new_tensor_dict)
|
||||
|
||||
|
||||
class AccuracyEvaluation(LabeledEvaluation):
|
||||
def __init__(self, data_type, global_step, idxs, yp, y, correct, loss, tensor_dict=None):
|
||||
super(AccuracyEvaluation, self).__init__(data_type, global_step, idxs, yp, y, tensor_dict=tensor_dict)
|
||||
self.loss = loss
|
||||
self.correct = correct
|
||||
self.acc = sum(correct) / len(correct)
|
||||
self.dict['loss'] = loss
|
||||
self.dict['correct'] = correct
|
||||
self.dict['acc'] = self.acc
|
||||
loss_summary = tf.Summary(value=[tf.Summary.Value(tag='{}/loss'.format(data_type), simple_value=self.loss)])
|
||||
acc_summary = tf.Summary(value=[tf.Summary.Value(tag='{}/acc'.format(data_type), simple_value=self.acc)])
|
||||
self.summaries = [loss_summary, acc_summary]
|
||||
|
||||
def __repr__(self):
|
||||
return "{} step {}: accuracy={}, loss={}".format(self.data_type, self.global_step, self.acc, self.loss)
|
||||
|
||||
def __add__(self, other):
|
||||
if other == 0:
|
||||
return self
|
||||
assert self.data_type == other.data_type
|
||||
assert self.global_step == other.global_step
|
||||
new_idxs = self.idxs + other.idxs
|
||||
new_yp = self.yp + other.yp
|
||||
new_y = self.y + other.y
|
||||
new_correct = self.correct + other.correct
|
||||
new_loss = (self.loss * self.num_examples + other.loss * other.num_examples) / len(new_correct)
|
||||
if self.tensor_dict is not None:
|
||||
new_tensor_dict = {key: np.concatenate((val, other.tensor_dict[key]), axis=0) for key, val in self.tensor_dict.items()}
|
||||
return AccuracyEvaluation(self.data_type, self.global_step, new_idxs, new_yp, new_y, new_correct, new_loss, tensor_dict=new_tensor_dict)
|
||||
|
||||
|
||||
class Evaluator(object):
|
||||
def __init__(self, config, model, tensor_dict=None):
|
||||
self.config = config
|
||||
self.model = model
|
||||
self.global_step = model.global_step
|
||||
self.yp = model.yp
|
||||
self.tensor_dict = {} if tensor_dict is None else tensor_dict
|
||||
|
||||
def get_evaluation(self, sess, batch):
|
||||
idxs, data_set = batch
|
||||
feed_dict = self.model.get_feed_dict(data_set, False, supervised=False)
|
||||
global_step, yp, vals = sess.run([self.global_step, self.yp, list(self.tensor_dict.values())], feed_dict=feed_dict)
|
||||
yp = yp[:data_set.num_examples]
|
||||
tensor_dict = dict(zip(self.tensor_dict.keys(), vals))
|
||||
e = Evaluation(data_set.data_type, int(global_step), idxs, yp.tolist(), tensor_dict=tensor_dict)
|
||||
return e
|
||||
|
||||
def get_evaluation_from_batches(self, sess, batches):
|
||||
e = sum(self.get_evaluation(sess, batch) for batch in batches)
|
||||
return e
|
||||
|
||||
|
||||
class LabeledEvaluator(Evaluator):
|
||||
def __init__(self, config, model, tensor_dict=None):
|
||||
super(LabeledEvaluator, self).__init__(config, model, tensor_dict=tensor_dict)
|
||||
self.y = model.y
|
||||
|
||||
def get_evaluation(self, sess, batch):
|
||||
idxs, data_set = batch
|
||||
feed_dict = self.model.get_feed_dict(data_set, False, supervised=False)
|
||||
global_step, yp, vals = sess.run([self.global_step, self.yp, list(self.tensor_dict.values())], feed_dict=feed_dict)
|
||||
yp = yp[:data_set.num_examples]
|
||||
y = feed_dict[self.y]
|
||||
tensor_dict = dict(zip(self.tensor_dict.keys(), vals))
|
||||
e = LabeledEvaluation(data_set.data_type, int(global_step), idxs, yp.tolist(), y.tolist(), tensor_dict=tensor_dict)
|
||||
return e
|
||||
|
||||
|
||||
class AccuracyEvaluator(LabeledEvaluator):
|
||||
def __init__(self, config, model, tensor_dict=None):
|
||||
super(AccuracyEvaluator, self).__init__(config, model, tensor_dict=tensor_dict)
|
||||
self.loss = model.loss
|
||||
|
||||
def get_evaluation(self, sess, batch):
|
||||
idxs, data_set = batch
|
||||
assert isinstance(data_set, DataSet)
|
||||
feed_dict = self.model.get_feed_dict(data_set, False)
|
||||
global_step, yp, loss, vals = sess.run([self.global_step, self.yp, self.loss, list(self.tensor_dict.values())], feed_dict=feed_dict)
|
||||
y = data_set.data['y']
|
||||
yp = yp[:data_set.num_examples]
|
||||
correct = [self.__class__.compare(yi, ypi) for yi, ypi in zip(y, yp)]
|
||||
tensor_dict = dict(zip(self.tensor_dict.keys(), vals))
|
||||
e = AccuracyEvaluation(data_set.data_type, int(global_step), idxs, yp.tolist(), y, correct, float(loss), tensor_dict=tensor_dict)
|
||||
return e
|
||||
|
||||
@staticmethod
|
||||
def compare(yi, ypi):
|
||||
for start, stop in yi:
|
||||
if start == int(np.argmax(ypi)):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
class AccuracyEvaluator2(AccuracyEvaluator):
|
||||
@staticmethod
|
||||
def compare(yi, ypi):
|
||||
for start, stop in yi:
|
||||
para_start = int(np.argmax(np.max(ypi, 1)))
|
||||
sent_start = int(np.argmax(ypi[para_start]))
|
||||
if tuple(start) == (para_start, sent_start):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
class ForwardEvaluation(Evaluation):
|
||||
def __init__(self, data_type, global_step, idxs, yp, yp2, loss, id2answer_dict, tensor_dict=None):
|
||||
super(ForwardEvaluation, self).__init__(data_type, global_step, idxs, yp, tensor_dict=tensor_dict)
|
||||
self.yp2 = yp2
|
||||
self.loss = loss
|
||||
self.dict['loss'] = loss
|
||||
self.dict['yp2'] = yp2
|
||||
self.id2answer_dict = id2answer_dict
|
||||
|
||||
def __add__(self, other):
|
||||
if other == 0:
|
||||
return self
|
||||
assert self.data_type == other.data_type
|
||||
assert self.global_step == other.global_step
|
||||
new_idxs = self.idxs + other.idxs
|
||||
new_yp = self.yp + other.yp
|
||||
new_yp2 = self.yp2 + other.yp2
|
||||
new_loss = (self.loss * self.num_examples + other.loss * other.num_examples) / len(new_yp)
|
||||
new_id2answer_dict = dict(list(self.id2answer_dict.items()) + list(other.id2answer_dict.items()))
|
||||
new_id2score_dict = dict(list(self.id2answer_dict['scores'].items()) + list(other.id2answer_dict['scores'].items()))
|
||||
new_id2answer_dict['scores'] = new_id2score_dict
|
||||
if self.tensor_dict is not None:
|
||||
new_tensor_dict = {key: np.concatenate((val, other.tensor_dict[key]), axis=0) for key, val in self.tensor_dict.items()}
|
||||
return ForwardEvaluation(self.data_type, self.global_step, new_idxs, new_yp, new_yp2, new_loss, new_id2answer_dict, tensor_dict=new_tensor_dict)
|
||||
|
||||
def __repr__(self):
|
||||
return "{} step {}: loss={:.4f}".format(self.data_type, self.global_step, self.loss)
|
||||
|
||||
|
||||
class F1Evaluation(AccuracyEvaluation):
|
||||
def __init__(self, data_type, global_step, idxs, yp, yp2, y, correct, loss, f1s, id2answer_dict, tensor_dict=None):
|
||||
super(F1Evaluation, self).__init__(data_type, global_step, idxs, yp, y, correct, loss, tensor_dict=tensor_dict)
|
||||
self.yp2 = yp2
|
||||
self.f1s = f1s
|
||||
self.f1 = float(np.mean(f1s))
|
||||
self.dict['yp2'] = yp2
|
||||
self.dict['f1s'] = f1s
|
||||
self.dict['f1'] = self.f1
|
||||
self.id2answer_dict = id2answer_dict
|
||||
f1_summary = tf.Summary(value=[tf.Summary.Value(tag='{}/f1'.format(data_type), simple_value=self.f1)])
|
||||
self.summaries.append(f1_summary)
|
||||
|
||||
def __add__(self, other):
|
||||
if other == 0:
|
||||
return self
|
||||
assert self.data_type == other.data_type
|
||||
assert self.global_step == other.global_step
|
||||
new_idxs = self.idxs + other.idxs
|
||||
new_yp = self.yp + other.yp
|
||||
new_yp2 = self.yp2 + other.yp2
|
||||
new_y = self.y + other.y
|
||||
new_correct = self.correct + other.correct
|
||||
new_f1s = self.f1s + other.f1s
|
||||
new_loss = (self.loss * self.num_examples + other.loss * other.num_examples) / len(new_correct)
|
||||
new_id2answer_dict = dict(list(self.id2answer_dict.items()) + list(other.id2answer_dict.items()))
|
||||
new_id2score_dict = dict(list(self.id2answer_dict['scores'].items()) + list(other.id2answer_dict['scores'].items()))
|
||||
new_id2answer_dict['scores'] = new_id2score_dict
|
||||
if 'na' in self.id2answer_dict:
|
||||
new_id2na_dict = dict(list(self.id2answer_dict['na'].items()) + list(other.id2answer_dict['na'].items()))
|
||||
new_id2answer_dict['na'] = new_id2na_dict
|
||||
e = F1Evaluation(self.data_type, self.global_step, new_idxs, new_yp, new_yp2, new_y, new_correct, new_loss, new_f1s, new_id2answer_dict)
|
||||
if 'wyp' in self.dict:
|
||||
new_wyp = self.dict['wyp'] + other.dict['wyp']
|
||||
e.dict['wyp'] = new_wyp
|
||||
return e
|
||||
|
||||
def __repr__(self):
|
||||
return "{} step {}: accuracy={:.4f}, f1={:.4f}, loss={:.4f}".format(self.data_type, self.global_step, self.acc, self.f1, self.loss)
|
||||
|
||||
|
||||
class F1Evaluator(LabeledEvaluator):
|
||||
def __init__(self, config, model, tensor_dict=None):
|
||||
super(F1Evaluator, self).__init__(config, model, tensor_dict=tensor_dict)
|
||||
self.yp2 = model.yp2
|
||||
self.wyp = model.wyp
|
||||
self.loss = model.loss
|
||||
if config.na:
|
||||
self.na = model.na_prob
|
||||
|
||||
def get_evaluation(self, sess, batch):
|
||||
idxs, data_set = self._split_batch(batch)
|
||||
assert isinstance(data_set, DataSet)
|
||||
feed_dict = self._get_feed_dict(batch)
|
||||
if self.config.na:
|
||||
global_step, yp, yp2, wyp, loss, na, vals = sess.run([self.global_step, self.yp, self.yp2, self.wyp, self.loss, self.na, list(self.tensor_dict.values())], feed_dict=feed_dict)
|
||||
else:
|
||||
global_step, yp, yp2, wyp, loss, vals = sess.run([self.global_step, self.yp, self.yp2, self.wyp, self.loss, list(self.tensor_dict.values())], feed_dict=feed_dict)
|
||||
y = data_set.data['y']
|
||||
if self.config.squash:
|
||||
new_y = []
|
||||
for xi, yi in zip(data_set.data['x'], y):
|
||||
new_yi = []
|
||||
for start, stop in yi:
|
||||
start_offset = sum(map(len, xi[:start[0]]))
|
||||
stop_offset = sum(map(len, xi[:stop[0]]))
|
||||
new_start = 0, start_offset + start[1]
|
||||
new_stop = 0, stop_offset + stop[1]
|
||||
new_yi.append((new_start, new_stop))
|
||||
new_y.append(new_yi)
|
||||
y = new_y
|
||||
if self.config.single:
|
||||
new_y = []
|
||||
for yi in y:
|
||||
new_yi = []
|
||||
for start, stop in yi:
|
||||
new_start = 0, start[1]
|
||||
new_stop = 0, stop[1]
|
||||
new_yi.append((new_start, new_stop))
|
||||
new_y.append(new_yi)
|
||||
y = new_y
|
||||
|
||||
yp, yp2, wyp = yp[:data_set.num_examples], yp2[:data_set.num_examples], wyp[:data_set.num_examples]
|
||||
if self.config.wy:
|
||||
spans, scores = zip(*[get_best_span_wy(wypi, self.config.th) for wypi in wyp])
|
||||
else:
|
||||
spans, scores = zip(*[get_best_span(ypi, yp2i) for ypi, yp2i in zip(yp, yp2)])
|
||||
|
||||
def _get(xi, span):
|
||||
if len(xi) <= span[0][0]:
|
||||
return [""]
|
||||
if len(xi[span[0][0]]) <= span[1][1]:
|
||||
return [""]
|
||||
return xi[span[0][0]][span[0][1]:span[1][1]]
|
||||
|
||||
def _get2(context, xi, span):
|
||||
if len(xi) <= span[0][0]:
|
||||
return ""
|
||||
if len(xi[span[0][0]]) <= span[1][1]:
|
||||
return ""
|
||||
return get_phrase(context, xi, span)
|
||||
|
||||
id2answer_dict = {id_: _get2(context, xi, span)
|
||||
for id_, xi, span, context in zip(data_set.data['ids'], data_set.data['x'], spans, data_set.data['p'])}
|
||||
id2score_dict = {id_: score for id_, score in zip(data_set.data['ids'], scores)}
|
||||
id2answer_dict['scores'] = id2score_dict
|
||||
if self.config.na:
|
||||
id2na_dict = {id_: float(each) for id_, each in zip(data_set.data['ids'], na)}
|
||||
id2answer_dict['na'] = id2na_dict
|
||||
correct = [self.__class__.compare2(yi, span) for yi, span in zip(y, spans)]
|
||||
f1s = [self.__class__.span_f1(yi, span) for yi, span in zip(y, spans)]
|
||||
tensor_dict = dict(zip(self.tensor_dict.keys(), vals))
|
||||
e = F1Evaluation(data_set.data_type, int(global_step), idxs, yp.tolist(), yp2.tolist(), y,
|
||||
correct, float(loss), f1s, id2answer_dict, tensor_dict=tensor_dict)
|
||||
if self.config.wy:
|
||||
e.dict['wyp'] = wyp.tolist()
|
||||
return e
|
||||
|
||||
def _split_batch(self, batch):
|
||||
return batch
|
||||
|
||||
def _get_feed_dict(self, batch):
|
||||
return self.model.get_feed_dict(batch[1], False)
|
||||
|
||||
@staticmethod
|
||||
def compare(yi, ypi, yp2i):
|
||||
for start, stop in yi:
|
||||
aypi = argmax(ypi)
|
||||
mask = np.zeros(yp2i.shape)
|
||||
mask[aypi[0], aypi[1]:] = np.ones([yp2i.shape[1] - aypi[1]])
|
||||
if tuple(start) == aypi and (stop[0], stop[1]-1) == argmax(yp2i * mask):
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def compare2(yi, span):
|
||||
for start, stop in yi:
|
||||
if tuple(start) == span[0] and tuple(stop) == span[1]:
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def span_f1(yi, span):
|
||||
max_f1 = 0
|
||||
for start, stop in yi:
|
||||
if start[0] == span[0][0]:
|
||||
true_span = start[1], stop[1]
|
||||
pred_span = span[0][1], span[1][1]
|
||||
f1 = span_f1(true_span, pred_span)
|
||||
max_f1 = max(f1, max_f1)
|
||||
return max_f1
|
||||
|
||||
|
||||
class MultiGPUF1Evaluator(F1Evaluator):
|
||||
def __init__(self, config, models, tensor_dict=None):
|
||||
super(MultiGPUF1Evaluator, self).__init__(config, models[0], tensor_dict=tensor_dict)
|
||||
self.models = models
|
||||
with tf.name_scope("eval_concat"):
|
||||
N, M, JX = config.batch_size, config.max_num_sents, config.max_sent_size
|
||||
self.yp = tf.concat(axis=0, values=[padded_reshape(model.yp, [N, M, JX]) for model in models])
|
||||
self.yp2 = tf.concat(axis=0, values=[padded_reshape(model.yp2, [N, M, JX]) for model in models])
|
||||
self.wy = tf.concat(axis=0, values=[padded_reshape(model.wy, [N, M, JX]) for model in models])
|
||||
self.loss = tf.add_n([model.loss for model in models])/len(models)
|
||||
|
||||
def _split_batch(self, batches):
|
||||
idxs_list, data_sets = zip(*batches)
|
||||
idxs = sum(idxs_list, ())
|
||||
data_set = sum(data_sets, data_sets[0].get_empty())
|
||||
return idxs, data_set
|
||||
|
||||
def _get_feed_dict(self, batches):
|
||||
feed_dict = {}
|
||||
for model, (_, data_set) in zip(self.models, batches):
|
||||
feed_dict.update(model.get_feed_dict(data_set, False))
|
||||
return feed_dict
|
||||
|
||||
|
||||
class ForwardEvaluator(Evaluator):
|
||||
def __init__(self, config, model, tensor_dict=None):
|
||||
super(ForwardEvaluator, self).__init__(config, model, tensor_dict=tensor_dict)
|
||||
self.yp2 = model.yp2
|
||||
self.loss = model.loss
|
||||
if config.na:
|
||||
self.na = model.na_prob
|
||||
|
||||
def get_evaluation(self, sess, batch):
|
||||
idxs, data_set = batch
|
||||
assert isinstance(data_set, DataSet)
|
||||
feed_dict = self.model.get_feed_dict(data_set, False)
|
||||
if self.config.na:
|
||||
global_step, yp, yp2, loss, na, vals = sess.run([self.global_step, self.yp, self.yp2, self.loss, self.na, list(self.tensor_dict.values())], feed_dict=feed_dict)
|
||||
else:
|
||||
global_step, yp, yp2, loss, vals = sess.run([self.global_step, self.yp, self.yp2, self.loss, list(self.tensor_dict.values())], feed_dict=feed_dict)
|
||||
|
||||
yp, yp2 = yp[:data_set.num_examples], yp2[:data_set.num_examples]
|
||||
spans, scores = zip(*[get_best_span(ypi, yp2i) for ypi, yp2i in zip(yp, yp2)])
|
||||
|
||||
def _get(xi, span):
|
||||
if len(xi) <= span[0][0]:
|
||||
return [""]
|
||||
if len(xi[span[0][0]]) <= span[1][1]:
|
||||
return [""]
|
||||
return xi[span[0][0]][span[0][1]:span[1][1]]
|
||||
|
||||
def _get2(context, xi, span):
|
||||
if len(xi) <= span[0][0]:
|
||||
return ""
|
||||
if len(xi[span[0][0]]) <= span[1][1]:
|
||||
return ""
|
||||
return get_phrase(context, xi, span)
|
||||
|
||||
id2answer_dict = {id_: _get2(context, xi, span)
|
||||
for id_, xi, span, context in zip(data_set.data['ids'], data_set.data['x'], spans, data_set.data['p'])}
|
||||
id2score_dict = {id_: score for id_, score in zip(data_set.data['ids'], scores)}
|
||||
id2answer_dict['scores'] = id2score_dict
|
||||
if self.config.na:
|
||||
id2na_dict = {id_: float(each) for id_, each in zip(data_set.data['ids'], na)}
|
||||
id2answer_dict['na'] = id2na_dict
|
||||
tensor_dict = dict(zip(self.tensor_dict.keys(), vals))
|
||||
e = ForwardEvaluation(data_set.data_type, int(global_step), idxs, yp.tolist(), yp2.tolist(), float(loss), id2answer_dict, tensor_dict=tensor_dict)
|
||||
# TODO : wy support
|
||||
return e
|
||||
|
||||
@staticmethod
|
||||
def compare(yi, ypi, yp2i):
|
||||
for start, stop in yi:
|
||||
aypi = argmax(ypi)
|
||||
mask = np.zeros(yp2i.shape)
|
||||
mask[aypi[0], aypi[1]:] = np.ones([yp2i.shape[1] - aypi[1]])
|
||||
if tuple(start) == aypi and (stop[0], stop[1]-1) == argmax(yp2i * mask):
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def compare2(yi, span):
|
||||
for start, stop in yi:
|
||||
if tuple(start) == span[0] and tuple(stop) == span[1]:
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def span_f1(yi, span):
|
||||
max_f1 = 0
|
||||
for start, stop in yi:
|
||||
if start[0] == span[0][0]:
|
||||
true_span = start[1], stop[1]
|
||||
pred_span = span[0][1], span[1][1]
|
||||
f1 = span_f1(true_span, pred_span)
|
||||
max_f1 = max(f1, max_f1)
|
||||
return max_f1
|
||||
|
||||
|
35
tensorflow/SQuAD/basic/get_pr.py
Normal file
35
tensorflow/SQuAD/basic/get_pr.py
Normal file
|
@ -0,0 +1,35 @@
|
|||
import json
|
||||
import argparse
|
||||
|
||||
|
||||
def get_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("path")
|
||||
parser.add_argument("-t", "--th", type=float, default=0.5)
|
||||
# TODO : put more args here
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def get_pr(args):
|
||||
with open(args.path, 'r') as fp:
|
||||
answers = json.load(fp)
|
||||
|
||||
na = answers['na']
|
||||
|
||||
tp = sum(int(not id_.startswith("neg") and score < args.th) for id_, score in na.items())
|
||||
fp = sum(int(id_.startswith("neg") and score < args.th) for id_, score in na.items())
|
||||
tn = sum(int(id_.startswith("neg") and score >= args.th) for id_, score in na.items())
|
||||
fn = sum(int(not id_.startswith("neg") and score >= args.th) for id_, score in na.items())
|
||||
|
||||
p = tp / (tp + fp)
|
||||
r = tp / (tp + fn)
|
||||
print("p={:.3f}, r={:.3f}".format(p, r))
|
||||
|
||||
|
||||
def main():
|
||||
args = get_args()
|
||||
get_pr(args)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
79
tensorflow/SQuAD/basic/graph_handler.py
Normal file
79
tensorflow/SQuAD/basic/graph_handler.py
Normal file
|
@ -0,0 +1,79 @@
|
|||
import gzip
|
||||
import json
|
||||
from json import encoder
|
||||
import os
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from basic.evaluator import Evaluation, F1Evaluation
|
||||
from my.utils import short_floats
|
||||
|
||||
import pickle
|
||||
|
||||
|
||||
class GraphHandler(object):
|
||||
def __init__(self, config, model):
|
||||
self.config = config
|
||||
self.model = model
|
||||
self.saver = tf.train.Saver(max_to_keep=config.max_to_keep)
|
||||
self.writer = None
|
||||
self.save_path = os.path.join(config.save_dir, config.model_name)
|
||||
|
||||
def initialize(self, sess):
|
||||
sess.run(tf.global_variables_initializer())
|
||||
if self.config.load:
|
||||
self._load(sess)
|
||||
|
||||
if self.config.mode == 'train':
|
||||
self.writer = tf.summary.FileWriter(self.config.log_dir, graph=tf.get_default_graph())
|
||||
|
||||
def save(self, sess, global_step=None):
|
||||
saver = tf.train.Saver(max_to_keep=self.config.max_to_keep)
|
||||
saver.save(sess, self.save_path, global_step=global_step)
|
||||
|
||||
def _load(self, sess):
|
||||
config = self.config
|
||||
vars_ = {var.name.split(":")[0]: var for var in tf.global_variables()}
|
||||
if config.load_ema:
|
||||
ema = self.model.var_ema
|
||||
for var in tf.trainable_variables():
|
||||
del vars_[var.name.split(":")[0]]
|
||||
vars_[ema.average_name(var)] = var
|
||||
saver = tf.train.Saver(vars_, max_to_keep=config.max_to_keep)
|
||||
|
||||
if config.load_path:
|
||||
save_path = config.load_path
|
||||
elif config.load_step > 0:
|
||||
save_path = os.path.join(config.save_dir, "{}-{}".format(config.model_name, config.load_step))
|
||||
else:
|
||||
save_dir = config.save_dir
|
||||
checkpoint = tf.train.get_checkpoint_state(save_dir)
|
||||
assert checkpoint is not None, "cannot load checkpoint at {}".format(save_dir)
|
||||
save_path = checkpoint.model_checkpoint_path
|
||||
print("Loading saved model from {}".format(save_path))
|
||||
saver.restore(sess, save_path)
|
||||
|
||||
def add_summary(self, summary, global_step):
|
||||
self.writer.add_summary(summary, global_step)
|
||||
|
||||
def add_summaries(self, summaries, global_step):
|
||||
for summary in summaries:
|
||||
self.add_summary(summary, global_step)
|
||||
|
||||
def dump_eval(self, e, precision=2, path=None):
|
||||
assert isinstance(e, Evaluation)
|
||||
if self.config.dump_pickle:
|
||||
path = path or os.path.join(self.config.eval_dir, "{}-{}.pklz".format(e.data_type, str(e.global_step).zfill(6)))
|
||||
with gzip.open(path, 'wb', compresslevel=3) as fh:
|
||||
pickle.dump(e.dict, fh)
|
||||
else:
|
||||
path = path or os.path.join(self.config.eval_dir, "{}-{}.json".format(e.data_type, str(e.global_step).zfill(6)))
|
||||
with open(path, 'w') as fh:
|
||||
json.dump(short_floats(e.dict, precision), fh)
|
||||
|
||||
def dump_answer(self, e, path=None):
|
||||
assert isinstance(e, Evaluation)
|
||||
path = path or os.path.join(self.config.answer_dir, "{}-{}.json".format(e.data_type, str(e.global_step).zfill(6)))
|
||||
with open(path, 'w') as fh:
|
||||
json.dump(e.id2answer_dict, fh)
|
||||
|
233
tensorflow/SQuAD/basic/main.py
Normal file
233
tensorflow/SQuAD/basic/main.py
Normal file
|
@ -0,0 +1,233 @@
|
|||
import argparse
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
import shutil
|
||||
from pprint import pprint
|
||||
|
||||
import tensorflow as tf
|
||||
from tqdm import tqdm
|
||||
import numpy as np
|
||||
|
||||
from basic.evaluator import ForwardEvaluator, MultiGPUF1Evaluator
|
||||
from basic.graph_handler import GraphHandler
|
||||
from basic.model import get_multi_gpu_models
|
||||
from basic.trainer import MultiGPUTrainer
|
||||
from basic.read_data import read_data, get_squad_data_filter, update_config
|
||||
from my.tensorflow import get_num_params
|
||||
|
||||
|
||||
def main(config):
|
||||
set_dirs(config)
|
||||
with tf.device(config.device):
|
||||
if config.mode == 'train':
|
||||
_train(config)
|
||||
elif config.mode == 'test':
|
||||
_test(config)
|
||||
elif config.mode == 'forward':
|
||||
_forward(config)
|
||||
else:
|
||||
raise ValueError("invalid value for 'mode': {}".format(config.mode))
|
||||
|
||||
|
||||
def set_dirs(config):
|
||||
# create directories
|
||||
assert config.load or config.mode == 'train', "config.load must be True if not training"
|
||||
if not config.load and os.path.exists(config.out_dir):
|
||||
shutil.rmtree(config.out_dir)
|
||||
|
||||
config.save_dir = os.path.join(config.out_dir, "save")
|
||||
config.log_dir = os.path.join(config.out_dir, "log")
|
||||
config.eval_dir = os.path.join(config.out_dir, "eval")
|
||||
config.answer_dir = os.path.join(config.out_dir, "answer")
|
||||
if not os.path.exists(config.out_dir):
|
||||
os.makedirs(config.out_dir)
|
||||
if not os.path.exists(config.save_dir):
|
||||
os.mkdir(config.save_dir)
|
||||
if not os.path.exists(config.log_dir):
|
||||
os.mkdir(config.log_dir)
|
||||
if not os.path.exists(config.answer_dir):
|
||||
os.mkdir(config.answer_dir)
|
||||
if not os.path.exists(config.eval_dir):
|
||||
os.mkdir(config.eval_dir)
|
||||
|
||||
|
||||
def _config_debug(config):
|
||||
if config.debug:
|
||||
config.num_steps = 2
|
||||
config.eval_period = 1
|
||||
config.log_period = 1
|
||||
config.save_period = 1
|
||||
config.val_num_batches = 2
|
||||
config.test_num_batches = 2
|
||||
|
||||
|
||||
def _train(config):
|
||||
data_filter = get_squad_data_filter(config)
|
||||
train_data = read_data(config, 'train', config.load, data_filter=data_filter)
|
||||
dev_data = read_data(config, 'dev', True, data_filter=data_filter)
|
||||
update_config(config, [train_data, dev_data])
|
||||
|
||||
_config_debug(config)
|
||||
|
||||
word2vec_dict = train_data.shared['lower_word2vec'] if config.lower_word else train_data.shared['word2vec']
|
||||
word2idx_dict = train_data.shared['word2idx']
|
||||
idx2vec_dict = {word2idx_dict[word]: vec for word, vec in word2vec_dict.items() if word in word2idx_dict}
|
||||
emb_mat = np.array([idx2vec_dict[idx] if idx in idx2vec_dict
|
||||
else np.random.multivariate_normal(np.zeros(config.word_emb_size), np.eye(config.word_emb_size))
|
||||
for idx in range(config.word_vocab_size)])
|
||||
config.emb_mat = emb_mat
|
||||
|
||||
# construct model graph and variables (using default graph)
|
||||
pprint(config.__flags, indent=2)
|
||||
models = get_multi_gpu_models(config)
|
||||
model = models[0]
|
||||
print("num params: {}".format(get_num_params()))
|
||||
trainer = MultiGPUTrainer(config, models)
|
||||
evaluator = MultiGPUF1Evaluator(config, models, tensor_dict=model.tensor_dict if config.vis else None)
|
||||
graph_handler = GraphHandler(config, model) # controls all tensors and variables in the graph, including loading /saving
|
||||
|
||||
# Variables
|
||||
sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
|
||||
graph_handler.initialize(sess)
|
||||
|
||||
# Begin training
|
||||
num_steps = config.num_steps or int(math.ceil(train_data.num_examples / (config.batch_size * config.num_gpus))) * config.num_epochs
|
||||
global_step = 0
|
||||
for batches in tqdm(train_data.get_multi_batches(config.batch_size, config.num_gpus,
|
||||
num_steps=num_steps, shuffle=True, cluster=config.cluster), total=num_steps):
|
||||
global_step = sess.run(model.global_step) + 1 # +1 because all calculations are done after step
|
||||
get_summary = global_step % config.log_period == 0
|
||||
loss, summary, train_op = trainer.step(sess, batches, get_summary=get_summary)
|
||||
if get_summary:
|
||||
graph_handler.add_summary(summary, global_step)
|
||||
|
||||
# occasional saving
|
||||
if global_step % config.save_period == 0:
|
||||
graph_handler.save(sess, global_step=global_step)
|
||||
|
||||
if not config.eval:
|
||||
continue
|
||||
# Occasional evaluation
|
||||
if global_step % config.eval_period == 0:
|
||||
num_steps = math.ceil(dev_data.num_examples / (config.batch_size * config.num_gpus))
|
||||
if 0 < config.val_num_batches < num_steps:
|
||||
num_steps = config.val_num_batches
|
||||
e_train = evaluator.get_evaluation_from_batches(
|
||||
sess, tqdm(train_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps), total=num_steps)
|
||||
)
|
||||
graph_handler.add_summaries(e_train.summaries, global_step)
|
||||
e_dev = evaluator.get_evaluation_from_batches(
|
||||
sess, tqdm(dev_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps), total=num_steps))
|
||||
graph_handler.add_summaries(e_dev.summaries, global_step)
|
||||
|
||||
if config.dump_eval:
|
||||
graph_handler.dump_eval(e_dev)
|
||||
if config.dump_answer:
|
||||
graph_handler.dump_answer(e_dev)
|
||||
if global_step % config.save_period != 0:
|
||||
graph_handler.save(sess, global_step=global_step)
|
||||
|
||||
|
||||
def _test(config):
|
||||
test_data = read_data(config, 'test', True)
|
||||
update_config(config, [test_data])
|
||||
|
||||
_config_debug(config)
|
||||
|
||||
if config.use_glove_for_unk:
|
||||
word2vec_dict = test_data.shared['lower_word2vec'] if config.lower_word else test_data.shared['word2vec']
|
||||
new_word2idx_dict = test_data.shared['new_word2idx']
|
||||
idx2vec_dict = {idx: word2vec_dict[word] for word, idx in new_word2idx_dict.items()}
|
||||
new_emb_mat = np.array([idx2vec_dict[idx] for idx in range(len(idx2vec_dict))], dtype='float32')
|
||||
config.new_emb_mat = new_emb_mat
|
||||
|
||||
pprint(config.__flags, indent=2)
|
||||
models = get_multi_gpu_models(config)
|
||||
model = models[0]
|
||||
evaluator = MultiGPUF1Evaluator(config, models, tensor_dict=models[0].tensor_dict if config.vis else None)
|
||||
graph_handler = GraphHandler(config, model)
|
||||
|
||||
sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
|
||||
graph_handler.initialize(sess)
|
||||
num_steps = math.ceil(test_data.num_examples / (config.batch_size * config.num_gpus))
|
||||
if 0 < config.test_num_batches < num_steps:
|
||||
num_steps = config.test_num_batches
|
||||
|
||||
e = None
|
||||
for multi_batch in tqdm(test_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps, cluster=config.cluster), total=num_steps):
|
||||
ei = evaluator.get_evaluation(sess, multi_batch)
|
||||
e = ei if e is None else e + ei
|
||||
if config.vis:
|
||||
eval_subdir = os.path.join(config.eval_dir, "{}-{}".format(ei.data_type, str(ei.global_step).zfill(6)))
|
||||
if not os.path.exists(eval_subdir):
|
||||
os.mkdir(eval_subdir)
|
||||
path = os.path.join(eval_subdir, str(ei.idxs[0]).zfill(8))
|
||||
graph_handler.dump_eval(ei, path=path)
|
||||
|
||||
print(e)
|
||||
if config.dump_answer:
|
||||
print("dumping answer ...")
|
||||
graph_handler.dump_answer(e)
|
||||
if config.dump_eval:
|
||||
print("dumping eval ...")
|
||||
graph_handler.dump_eval(e)
|
||||
|
||||
|
||||
def _forward(config):
|
||||
assert config.load
|
||||
test_data = read_data(config, config.forward_name, True)
|
||||
update_config(config, [test_data])
|
||||
|
||||
_config_debug(config)
|
||||
|
||||
if config.use_glove_for_unk:
|
||||
word2vec_dict = test_data.shared['lower_word2vec'] if config.lower_word else test_data.shared['word2vec']
|
||||
new_word2idx_dict = test_data.shared['new_word2idx']
|
||||
idx2vec_dict = {idx: word2vec_dict[word] for word, idx in new_word2idx_dict.items()}
|
||||
new_emb_mat = np.array([idx2vec_dict[idx] for idx in range(len(idx2vec_dict))], dtype='float32')
|
||||
config.new_emb_mat = new_emb_mat
|
||||
|
||||
pprint(config.__flags, indent=2)
|
||||
models = get_multi_gpu_models(config)
|
||||
model = models[0]
|
||||
print("num params: {}".format(get_num_params()))
|
||||
evaluator = ForwardEvaluator(config, model)
|
||||
graph_handler = GraphHandler(config, model) # controls all tensors and variables in the graph, including loading /saving
|
||||
|
||||
sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
|
||||
graph_handler.initialize(sess)
|
||||
|
||||
num_batches = math.ceil(test_data.num_examples / config.batch_size)
|
||||
if 0 < config.test_num_batches < num_batches:
|
||||
num_batches = config.test_num_batches
|
||||
e = evaluator.get_evaluation_from_batches(sess, tqdm(test_data.get_batches(config.batch_size, num_batches=num_batches), total=num_batches))
|
||||
print(e)
|
||||
if config.dump_answer:
|
||||
print("dumping answer ...")
|
||||
graph_handler.dump_answer(e, path=config.answer_path)
|
||||
if config.dump_eval:
|
||||
print("dumping eval ...")
|
||||
graph_handler.dump_eval(e, path=config.eval_path)
|
||||
|
||||
|
||||
def _get_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("config_path")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
class Config(object):
|
||||
def __init__(self, **entries):
|
||||
self.__dict__.update(entries)
|
||||
|
||||
|
||||
def _run():
|
||||
args = _get_args()
|
||||
with open(args.config_path, 'r') as fh:
|
||||
config = Config(**json.load(fh))
|
||||
main(config)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
_run()
|
535
tensorflow/SQuAD/basic/model.py
Normal file
535
tensorflow/SQuAD/basic/model.py
Normal file
|
@ -0,0 +1,535 @@
|
|||
import random
|
||||
|
||||
import itertools
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib.rnn import BasicLSTMCell
|
||||
|
||||
from basic.read_data import DataSet
|
||||
from my.tensorflow import get_initializer
|
||||
from my.tensorflow.nn import softsel, get_logits, highway_network, multi_conv1d
|
||||
from my.tensorflow.rnn import bidirectional_dynamic_rnn
|
||||
from my.tensorflow.rnn_cell import SwitchableDropoutWrapper, AttentionCell
|
||||
|
||||
|
||||
def get_multi_gpu_models(config):
|
||||
models = []
|
||||
with tf.variable_scope(tf.get_variable_scope()):
|
||||
for gpu_idx in range(config.num_gpus):
|
||||
with tf.name_scope("model_{}".format(gpu_idx)) as scope, tf.device("/{}:{}".format(config.device_type, gpu_idx)):
|
||||
if gpu_idx > 0:
|
||||
tf.get_variable_scope().reuse_variables()
|
||||
model = Model(config, scope, rep=gpu_idx == 0)
|
||||
models.append(model)
|
||||
|
||||
# update the summary in a different scope to avoid reuse issue
|
||||
with tf.variable_scope('loss_summary', reuse=False):
|
||||
for gpu_idx in range(config.num_gpus):
|
||||
with tf.name_scope("model_{}".format(gpu_idx)) as scope, tf.device("/{}:{}".format(config.device_type, gpu_idx)):
|
||||
model = models[gpu_idx]
|
||||
rep = gpu_idx == 0
|
||||
if rep:
|
||||
model._build_var_ema()
|
||||
if config.mode == 'train':
|
||||
model._build_ema();
|
||||
model.summary = tf.summary.merge_all()
|
||||
model.summary = tf.summary.merge(tf.get_collection("summaries", scope=model.scope))
|
||||
|
||||
return models
|
||||
|
||||
|
||||
class Model(object):
|
||||
def __init__(self, config, scope, rep=True):
|
||||
self.scope = scope
|
||||
self.config = config
|
||||
self.global_step = tf.get_variable('global_step', shape=[], dtype='int32',
|
||||
initializer=tf.constant_initializer(0), trainable=False)
|
||||
|
||||
# Define forward inputs here
|
||||
N, M, JX, JQ, VW, VC, W = \
|
||||
config.batch_size, config.max_num_sents, config.max_sent_size, \
|
||||
config.max_ques_size, config.word_vocab_size, config.char_vocab_size, config.max_word_size
|
||||
self.x = tf.placeholder('int32', [N, None, None], name='x')
|
||||
self.cx = tf.placeholder('int32', [N, None, None, W], name='cx')
|
||||
self.x_mask = tf.placeholder('bool', [N, None, None], name='x_mask')
|
||||
self.q = tf.placeholder('int32', [N, None], name='q')
|
||||
self.cq = tf.placeholder('int32', [N, None, W], name='cq')
|
||||
self.q_mask = tf.placeholder('bool', [N, None], name='q_mask')
|
||||
self.y = tf.placeholder('bool', [N, None, None], name='y')
|
||||
self.y2 = tf.placeholder('bool', [N, None, None], name='y2')
|
||||
self.wy = tf.placeholder('bool', [N, None, None], name='wy')
|
||||
self.is_train = tf.placeholder('bool', [], name='is_train')
|
||||
self.new_emb_mat = tf.placeholder('float', [None, config.word_emb_size], name='new_emb_mat')
|
||||
self.na = tf.placeholder('bool', [N], name='na')
|
||||
|
||||
# Define misc
|
||||
self.tensor_dict = {}
|
||||
|
||||
# Forward outputs / loss inputs
|
||||
self.logits = None
|
||||
self.yp = None
|
||||
self.var_list = None
|
||||
self.na_prob = None
|
||||
|
||||
# Loss outputs
|
||||
self.loss = None
|
||||
|
||||
self._build_forward()
|
||||
self._build_loss()
|
||||
self.var_ema = None
|
||||
# if rep:
|
||||
# self._build_var_ema()
|
||||
# if config.mode == 'train':
|
||||
# self._build_ema()
|
||||
|
||||
# self.summary = tf.summary.merge_all()
|
||||
# self.summary = tf.summary.merge(tf.get_collection("summaries", scope=self.scope))
|
||||
|
||||
def _build_forward(self):
|
||||
config = self.config
|
||||
N, M, JX, JQ, VW, VC, d, W = \
|
||||
config.batch_size, config.max_num_sents, config.max_sent_size, \
|
||||
config.max_ques_size, config.word_vocab_size, config.char_vocab_size, config.hidden_size, \
|
||||
config.max_word_size
|
||||
JX = tf.shape(self.x)[2]
|
||||
JQ = tf.shape(self.q)[1]
|
||||
M = tf.shape(self.x)[1]
|
||||
dc, dw, dco = config.char_emb_size, config.word_emb_size, config.char_out_size
|
||||
|
||||
with tf.variable_scope("emb"):
|
||||
if config.use_char_emb:
|
||||
with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
|
||||
char_emb_mat = tf.get_variable("char_emb_mat", shape=[VC, dc], dtype='float')
|
||||
|
||||
with tf.variable_scope("char"):
|
||||
Acx = tf.nn.embedding_lookup(char_emb_mat, self.cx) # [N, M, JX, W, dc]
|
||||
Acq = tf.nn.embedding_lookup(char_emb_mat, self.cq) # [N, JQ, W, dc]
|
||||
Acx = tf.reshape(Acx, [-1, JX, W, dc])
|
||||
Acq = tf.reshape(Acq, [-1, JQ, W, dc])
|
||||
|
||||
filter_sizes = list(map(int, config.out_channel_dims.split(',')))
|
||||
heights = list(map(int, config.filter_heights.split(',')))
|
||||
assert sum(filter_sizes) == dco, (filter_sizes, dco)
|
||||
with tf.variable_scope("conv"):
|
||||
xx = multi_conv1d(Acx, filter_sizes, heights, "VALID", self.is_train, config.keep_prob, scope="xx")
|
||||
if config.share_cnn_weights:
|
||||
tf.get_variable_scope().reuse_variables()
|
||||
qq = multi_conv1d(Acq, filter_sizes, heights, "VALID", self.is_train, config.keep_prob, scope="xx")
|
||||
else:
|
||||
qq = multi_conv1d(Acq, filter_sizes, heights, "VALID", self.is_train, config.keep_prob, scope="qq")
|
||||
xx = tf.reshape(xx, [-1, M, JX, dco])
|
||||
qq = tf.reshape(qq, [-1, JQ, dco])
|
||||
|
||||
if config.use_word_emb:
|
||||
with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
|
||||
if config.mode == 'train':
|
||||
word_emb_mat = tf.get_variable("word_emb_mat", dtype='float', shape=[VW, dw], initializer=get_initializer(config.emb_mat))
|
||||
else:
|
||||
word_emb_mat = tf.get_variable("word_emb_mat", shape=[VW, dw], dtype='float')
|
||||
if config.use_glove_for_unk:
|
||||
word_emb_mat = tf.concat(axis=0, values=[word_emb_mat, self.new_emb_mat])
|
||||
|
||||
with tf.name_scope("word"):
|
||||
Ax = tf.nn.embedding_lookup(word_emb_mat, self.x) # [N, M, JX, d]
|
||||
Aq = tf.nn.embedding_lookup(word_emb_mat, self.q) # [N, JQ, d]
|
||||
self.tensor_dict['x'] = Ax
|
||||
self.tensor_dict['q'] = Aq
|
||||
if config.use_char_emb:
|
||||
xx = tf.concat(axis=3, values=[xx, Ax]) # [N, M, JX, di]
|
||||
qq = tf.concat(axis=2, values=[qq, Aq]) # [N, JQ, di]
|
||||
else:
|
||||
xx = Ax
|
||||
qq = Aq
|
||||
|
||||
# highway network
|
||||
if config.highway:
|
||||
with tf.variable_scope("highway"):
|
||||
xx = highway_network(xx, config.highway_num_layers, True, wd=config.wd, is_train=self.is_train)
|
||||
tf.get_variable_scope().reuse_variables()
|
||||
qq = highway_network(qq, config.highway_num_layers, True, wd=config.wd, is_train=self.is_train)
|
||||
|
||||
self.tensor_dict['xx'] = xx
|
||||
self.tensor_dict['qq'] = qq
|
||||
|
||||
cell_fw = BasicLSTMCell(d, state_is_tuple=True)
|
||||
cell_bw = BasicLSTMCell(d, state_is_tuple=True)
|
||||
d_cell_fw = SwitchableDropoutWrapper(cell_fw, self.is_train, input_keep_prob=config.input_keep_prob)
|
||||
d_cell_bw = SwitchableDropoutWrapper(cell_bw, self.is_train, input_keep_prob=config.input_keep_prob)
|
||||
cell2_fw = BasicLSTMCell(d, state_is_tuple=True)
|
||||
cell2_bw = BasicLSTMCell(d, state_is_tuple=True)
|
||||
d_cell2_fw = SwitchableDropoutWrapper(cell2_fw, self.is_train, input_keep_prob=config.input_keep_prob)
|
||||
d_cell2_bw = SwitchableDropoutWrapper(cell2_bw, self.is_train, input_keep_prob=config.input_keep_prob)
|
||||
cell3_fw = BasicLSTMCell(d, state_is_tuple=True)
|
||||
cell3_bw = BasicLSTMCell(d, state_is_tuple=True)
|
||||
d_cell3_fw = SwitchableDropoutWrapper(cell3_fw, self.is_train, input_keep_prob=config.input_keep_prob)
|
||||
d_cell3_bw = SwitchableDropoutWrapper(cell3_bw, self.is_train, input_keep_prob=config.input_keep_prob)
|
||||
cell4_fw = BasicLSTMCell(d, state_is_tuple=True)
|
||||
cell4_bw = BasicLSTMCell(d, state_is_tuple=True)
|
||||
d_cell4_fw = SwitchableDropoutWrapper(cell4_fw, self.is_train, input_keep_prob=config.input_keep_prob)
|
||||
d_cell4_bw = SwitchableDropoutWrapper(cell4_bw, self.is_train, input_keep_prob=config.input_keep_prob)
|
||||
x_len = tf.reduce_sum(tf.cast(self.x_mask, 'int32'), 2) # [N, M]
|
||||
q_len = tf.reduce_sum(tf.cast(self.q_mask, 'int32'), 1) # [N]
|
||||
|
||||
with tf.variable_scope("prepro"):
|
||||
(fw_u, bw_u), ((_, fw_u_f), (_, bw_u_f)) = bidirectional_dynamic_rnn(d_cell_fw, d_cell_bw, qq, q_len, dtype='float', scope='u1') # [N, J, d], [N, d]
|
||||
u = tf.concat(axis=2, values=[fw_u, bw_u])
|
||||
if config.share_lstm_weights:
|
||||
tf.get_variable_scope().reuse_variables()
|
||||
(fw_h, bw_h), _ = bidirectional_dynamic_rnn(cell_fw, cell_bw, xx, x_len, dtype='float', scope='u1') # [N, M, JX, 2d]
|
||||
h = tf.concat(axis=3, values=[fw_h, bw_h]) # [N, M, JX, 2d]
|
||||
else:
|
||||
(fw_h, bw_h), _ = bidirectional_dynamic_rnn(cell_fw, cell_bw, xx, x_len, dtype='float', scope='h1') # [N, M, JX, 2d]
|
||||
h = tf.concat(axis=3, values=[fw_h, bw_h]) # [N, M, JX, 2d]
|
||||
self.tensor_dict['u'] = u
|
||||
self.tensor_dict['h'] = h
|
||||
|
||||
with tf.variable_scope("main"):
|
||||
if config.dynamic_att:
|
||||
p0 = h
|
||||
u = tf.reshape(tf.tile(tf.expand_dims(u, 1), [1, M, 1, 1]), [N * M, JQ, 2 * d])
|
||||
q_mask = tf.reshape(tf.tile(tf.expand_dims(self.q_mask, 1), [1, M, 1]), [N * M, JQ])
|
||||
first_cell_fw = AttentionCell(cell2_fw, u, mask=q_mask, mapper='sim',
|
||||
input_keep_prob=self.config.input_keep_prob, is_train=self.is_train)
|
||||
first_cell_bw = AttentionCell(cell2_bw, u, mask=q_mask, mapper='sim',
|
||||
input_keep_prob=self.config.input_keep_prob, is_train=self.is_train)
|
||||
second_cell_fw = AttentionCell(cell3_fw, u, mask=q_mask, mapper='sim',
|
||||
input_keep_prob=self.config.input_keep_prob, is_train=self.is_train)
|
||||
second_cell_bw = AttentionCell(cell3_bw, u, mask=q_mask, mapper='sim',
|
||||
input_keep_prob=self.config.input_keep_prob, is_train=self.is_train)
|
||||
else:
|
||||
p0 = attention_layer(config, self.is_train, h, u, h_mask=self.x_mask, u_mask=self.q_mask, scope="p0", tensor_dict=self.tensor_dict)
|
||||
first_cell_fw = d_cell2_fw
|
||||
second_cell_fw = d_cell3_fw
|
||||
first_cell_bw = d_cell2_bw
|
||||
second_cell_bw = d_cell3_bw
|
||||
|
||||
(fw_g0, bw_g0), _ = bidirectional_dynamic_rnn(first_cell_fw, first_cell_bw, p0, x_len, dtype='float', scope='g0') # [N, M, JX, 2d]
|
||||
g0 = tf.concat(axis=3, values=[fw_g0, bw_g0])
|
||||
(fw_g1, bw_g1), _ = bidirectional_dynamic_rnn(second_cell_fw, second_cell_bw, g0, x_len, dtype='float', scope='g1') # [N, M, JX, 2d]
|
||||
g1 = tf.concat(axis=3, values=[fw_g1, bw_g1])
|
||||
|
||||
logits = get_logits([g1, p0], d, True, wd=config.wd, input_keep_prob=config.input_keep_prob,
|
||||
mask=self.x_mask, is_train=self.is_train, func=config.answer_func, scope='logits1')
|
||||
a1i = softsel(tf.reshape(g1, [N, M * JX, 2 * d]), tf.reshape(logits, [N, M * JX]))
|
||||
a1i = tf.tile(tf.expand_dims(tf.expand_dims(a1i, 1), 1), [1, M, JX, 1])
|
||||
|
||||
(fw_g2, bw_g2), _ = bidirectional_dynamic_rnn(d_cell4_fw, d_cell4_bw, tf.concat(axis=3, values=[p0, g1, a1i, g1 * a1i]),
|
||||
x_len, dtype='float', scope='g2') # [N, M, JX, 2d]
|
||||
g2 = tf.concat(axis=3, values=[fw_g2, bw_g2])
|
||||
logits2 = get_logits([g2, p0], d, True, wd=config.wd, input_keep_prob=config.input_keep_prob,
|
||||
mask=self.x_mask,
|
||||
is_train=self.is_train, func=config.answer_func, scope='logits2')
|
||||
|
||||
flat_logits = tf.reshape(logits, [-1, M * JX])
|
||||
flat_yp = tf.nn.softmax(flat_logits) # [-1, M*JX]
|
||||
flat_logits2 = tf.reshape(logits2, [-1, M * JX])
|
||||
flat_yp2 = tf.nn.softmax(flat_logits2)
|
||||
|
||||
if config.na:
|
||||
na_bias = tf.get_variable("na_bias", shape=[], dtype='float')
|
||||
na_bias_tiled = tf.tile(tf.reshape(na_bias, [1, 1]), [N, 1]) # [N, 1]
|
||||
concat_flat_logits = tf.concat(axis=1, values=[na_bias_tiled, flat_logits])
|
||||
concat_flat_yp = tf.nn.softmax(concat_flat_logits)
|
||||
na_prob = tf.squeeze(tf.slice(concat_flat_yp, [0, 0], [-1, 1]), [1])
|
||||
flat_yp = tf.slice(concat_flat_yp, [0, 1], [-1, -1])
|
||||
|
||||
concat_flat_logits2 = tf.concat(axis=1, values=[na_bias_tiled, flat_logits2])
|
||||
concat_flat_yp2 = tf.nn.softmax(concat_flat_logits2)
|
||||
na_prob2 = tf.squeeze(tf.slice(concat_flat_yp2, [0, 0], [-1, 1]), [1]) # [N]
|
||||
flat_yp2 = tf.slice(concat_flat_yp2, [0, 1], [-1, -1])
|
||||
|
||||
self.concat_logits = concat_flat_logits
|
||||
self.concat_logits2 = concat_flat_logits2
|
||||
self.na_prob = na_prob * na_prob2
|
||||
|
||||
yp = tf.reshape(flat_yp, [-1, M, JX])
|
||||
yp2 = tf.reshape(flat_yp2, [-1, M, JX])
|
||||
wyp = tf.nn.sigmoid(logits2)
|
||||
|
||||
self.tensor_dict['g1'] = g1
|
||||
self.tensor_dict['g2'] = g2
|
||||
|
||||
self.logits = flat_logits
|
||||
self.logits2 = flat_logits2
|
||||
self.yp = yp
|
||||
self.yp2 = yp2
|
||||
self.wyp = wyp
|
||||
|
||||
def _build_loss(self):
|
||||
config = self.config
|
||||
JX = tf.shape(self.x)[2]
|
||||
M = tf.shape(self.x)[1]
|
||||
JQ = tf.shape(self.q)[1]
|
||||
|
||||
loss_mask = tf.reduce_max(tf.cast(self.q_mask, 'float'), 1)
|
||||
if config.wy:
|
||||
losses = tf.nn.sigmoid_cross_entropy_with_logits(
|
||||
logits=tf.reshape(self.logits2, [-1, M, JX]), labels=tf.cast(self.wy, 'float')) # [N, M, JX]
|
||||
num_pos = tf.reduce_sum(tf.cast(self.wy, 'float'))
|
||||
num_neg = tf.reduce_sum(tf.cast(self.x_mask, 'float')) - num_pos
|
||||
damp_ratio = num_pos / num_neg
|
||||
dampened_losses = losses * (
|
||||
(tf.cast(self.x_mask, 'float') - tf.cast(self.wy, 'float')) * damp_ratio + tf.cast(self.wy, 'float'))
|
||||
new_losses = tf.reduce_sum(dampened_losses, [1, 2])
|
||||
ce_loss = tf.reduce_mean(loss_mask * new_losses)
|
||||
"""
|
||||
if config.na:
|
||||
na = tf.reshape(self.na, [-1, 1])
|
||||
concat_y = tf.concat(1, [na, tf.reshape(self.wy, [-1, M * JX])])
|
||||
losses = tf.nn.softmax_cross_entropy_with_logits(
|
||||
self.concat_logits, tf.cast(concat_y, 'float') / tf.reduce_sum(tf.cast(self.wy, 'float')))
|
||||
else:
|
||||
losses = tf.nn.softmax_cross_entropy_with_logits(
|
||||
self.logits2, tf.cast(tf.reshape(self.wy, [-1, M * JX]), 'float') / tf.reduce_sum(tf.cast(self.wy, 'float')))
|
||||
ce_loss = tf.reduce_mean(loss_mask * losses)
|
||||
"""
|
||||
tf.add_to_collection('losses', ce_loss)
|
||||
|
||||
else:
|
||||
if config.na:
|
||||
na = tf.reshape(self.na, [-1, 1])
|
||||
concat_y = tf.concat(axis=1, values=[na, tf.reshape(self.y, [-1, M * JX])])
|
||||
losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.concat_logits, labels=tf.cast(concat_y, 'float'))
|
||||
concat_y2 = tf.concat(axis=1, values=[na, tf.reshape(self.y2, [-1, M * JX])])
|
||||
losses2 = tf.nn.softmax_cross_entropy_with_logits(logits=self.concat_logits2, labels=tf.cast(concat_y2, 'float'))
|
||||
else:
|
||||
losses = tf.nn.softmax_cross_entropy_with_logits(
|
||||
logits=self.logits, labels=tf.cast(tf.reshape(self.y, [-1, M * JX]), 'float'))
|
||||
losses2 = tf.nn.softmax_cross_entropy_with_logits(
|
||||
logits=self.logits2, labels=tf.cast(tf.reshape(self.y2, [-1, M * JX]), 'float'))
|
||||
ce_loss = tf.reduce_mean(loss_mask * losses)
|
||||
ce_loss2 = tf.reduce_mean(loss_mask * losses2)
|
||||
tf.add_to_collection('losses', ce_loss)
|
||||
tf.add_to_collection("losses", ce_loss2)
|
||||
|
||||
self.loss = tf.add_n(tf.get_collection('losses', scope=self.scope), name='loss')
|
||||
tf.summary.scalar(self.loss.op.name, self.loss)
|
||||
tf.add_to_collection('ema/scalar', self.loss)
|
||||
|
||||
def _build_ema(self):
|
||||
self.ema = tf.train.ExponentialMovingAverage(self.config.decay)
|
||||
ema = self.ema
|
||||
tensors = tf.get_collection("ema/scalar", scope=self.scope) + tf.get_collection("ema/vector", scope=self.scope)
|
||||
ema_op = ema.apply(tensors)
|
||||
for var in tf.get_collection("ema/scalar", scope=self.scope):
|
||||
ema_var = ema.average(var)
|
||||
tf.summary.scalar(ema_var.op.name, ema_var)
|
||||
for var in tf.get_collection("ema/vector", scope=self.scope):
|
||||
ema_var = ema.average(var)
|
||||
tf.summary.histogram(ema_var.op.name, ema_var)
|
||||
|
||||
with tf.control_dependencies([ema_op]):
|
||||
self.loss = tf.identity(self.loss)
|
||||
|
||||
def _build_var_ema(self):
|
||||
self.var_ema = tf.train.ExponentialMovingAverage(self.config.var_decay)
|
||||
ema = self.var_ema
|
||||
ema_op = ema.apply(tf.trainable_variables())
|
||||
with tf.control_dependencies([ema_op]):
|
||||
self.loss = tf.identity(self.loss)
|
||||
|
||||
def get_loss(self):
|
||||
return self.loss
|
||||
|
||||
def get_global_step(self):
|
||||
return self.global_step
|
||||
|
||||
def get_var_list(self):
|
||||
return self.var_list
|
||||
|
||||
def get_feed_dict(self, batch, is_train, supervised=True):
|
||||
assert isinstance(batch, DataSet)
|
||||
config = self.config
|
||||
N, M, JX, JQ, VW, VC, d, W = \
|
||||
config.batch_size, config.max_num_sents, config.max_sent_size, \
|
||||
config.max_ques_size, config.word_vocab_size, config.char_vocab_size, config.hidden_size, config.max_word_size
|
||||
feed_dict = {}
|
||||
|
||||
if config.len_opt:
|
||||
"""
|
||||
Note that this optimization results in variable GPU RAM usage (i.e. can cause OOM in the middle of training.)
|
||||
First test without len_opt and make sure no OOM, and use len_opt
|
||||
"""
|
||||
if sum(len(sent) for para in batch.data['x'] for sent in para) == 0:
|
||||
new_JX = 1
|
||||
else:
|
||||
new_JX = max(len(sent) for para in batch.data['x'] for sent in para)
|
||||
JX = min(JX, new_JX)
|
||||
|
||||
if sum(len(ques) for ques in batch.data['q']) == 0:
|
||||
new_JQ = 1
|
||||
else:
|
||||
new_JQ = max(len(ques) for ques in batch.data['q'])
|
||||
JQ = min(JQ, new_JQ)
|
||||
|
||||
if config.cpu_opt:
|
||||
if sum(len(para) for para in batch.data['x']) == 0:
|
||||
new_M = 1
|
||||
else:
|
||||
new_M = max(len(para) for para in batch.data['x'])
|
||||
M = min(M, new_M)
|
||||
|
||||
x = np.zeros([N, M, JX], dtype='int32')
|
||||
cx = np.zeros([N, M, JX, W], dtype='int32')
|
||||
x_mask = np.zeros([N, M, JX], dtype='bool')
|
||||
q = np.zeros([N, JQ], dtype='int32')
|
||||
cq = np.zeros([N, JQ, W], dtype='int32')
|
||||
q_mask = np.zeros([N, JQ], dtype='bool')
|
||||
|
||||
feed_dict[self.x] = x
|
||||
feed_dict[self.x_mask] = x_mask
|
||||
feed_dict[self.cx] = cx
|
||||
feed_dict[self.q] = q
|
||||
feed_dict[self.cq] = cq
|
||||
feed_dict[self.q_mask] = q_mask
|
||||
feed_dict[self.is_train] = is_train
|
||||
if config.use_glove_for_unk:
|
||||
feed_dict[self.new_emb_mat] = batch.shared['new_emb_mat']
|
||||
|
||||
X = batch.data['x']
|
||||
CX = batch.data['cx']
|
||||
|
||||
if supervised:
|
||||
y = np.zeros([N, M, JX], dtype='bool')
|
||||
y2 = np.zeros([N, M, JX], dtype='bool')
|
||||
wy = np.zeros([N, M, JX], dtype='bool')
|
||||
na = np.zeros([N], dtype='bool')
|
||||
feed_dict[self.y] = y
|
||||
feed_dict[self.y2] = y2
|
||||
feed_dict[self.wy] = wy
|
||||
feed_dict[self.na] = na
|
||||
|
||||
for i, (xi, cxi, yi, nai) in enumerate(zip(X, CX, batch.data['y'], batch.data['na'])):
|
||||
if nai:
|
||||
na[i] = nai
|
||||
continue
|
||||
start_idx, stop_idx = random.choice(yi)
|
||||
j, k = start_idx
|
||||
j2, k2 = stop_idx
|
||||
if config.single:
|
||||
X[i] = [xi[j]]
|
||||
CX[i] = [cxi[j]]
|
||||
j, j2 = 0, 0
|
||||
if config.squash:
|
||||
offset = sum(map(len, xi[:j]))
|
||||
j, k = 0, k + offset
|
||||
offset = sum(map(len, xi[:j2]))
|
||||
j2, k2 = 0, k2 + offset
|
||||
y[i, j, k] = True
|
||||
y2[i, j2, k2-1] = True
|
||||
if j == j2:
|
||||
wy[i, j, k:k2] = True
|
||||
else:
|
||||
wy[i, j, k:len(batch.data['x'][i][j])] = True
|
||||
wy[i, j2, :k2] = True
|
||||
|
||||
def _get_word(word):
|
||||
d = batch.shared['word2idx']
|
||||
for each in (word, word.lower(), word.capitalize(), word.upper()):
|
||||
if each in d:
|
||||
return d[each]
|
||||
if config.use_glove_for_unk:
|
||||
d2 = batch.shared['new_word2idx']
|
||||
for each in (word, word.lower(), word.capitalize(), word.upper()):
|
||||
if each in d2:
|
||||
return d2[each] + len(d)
|
||||
return 1
|
||||
|
||||
def _get_char(char):
|
||||
d = batch.shared['char2idx']
|
||||
if char in d:
|
||||
return d[char]
|
||||
return 1
|
||||
|
||||
for i, xi in enumerate(X):
|
||||
if self.config.squash:
|
||||
xi = [list(itertools.chain(*xi))]
|
||||
for j, xij in enumerate(xi):
|
||||
if j == config.max_num_sents:
|
||||
break
|
||||
for k, xijk in enumerate(xij):
|
||||
if k == config.max_sent_size:
|
||||
break
|
||||
each = _get_word(xijk)
|
||||
assert isinstance(each, int), each
|
||||
x[i, j, k] = each
|
||||
x_mask[i, j, k] = True
|
||||
|
||||
for i, cxi in enumerate(CX):
|
||||
if self.config.squash:
|
||||
cxi = [list(itertools.chain(*cxi))]
|
||||
for j, cxij in enumerate(cxi):
|
||||
if j == config.max_num_sents:
|
||||
break
|
||||
for k, cxijk in enumerate(cxij):
|
||||
if k == config.max_sent_size:
|
||||
break
|
||||
for l, cxijkl in enumerate(cxijk):
|
||||
if l == config.max_word_size:
|
||||
break
|
||||
cx[i, j, k, l] = _get_char(cxijkl)
|
||||
|
||||
for i, qi in enumerate(batch.data['q']):
|
||||
for j, qij in enumerate(qi):
|
||||
q[i, j] = _get_word(qij)
|
||||
q_mask[i, j] = True
|
||||
|
||||
for i, cqi in enumerate(batch.data['cq']):
|
||||
for j, cqij in enumerate(cqi):
|
||||
for k, cqijk in enumerate(cqij):
|
||||
cq[i, j, k] = _get_char(cqijk)
|
||||
if k + 1 == config.max_word_size:
|
||||
break
|
||||
|
||||
if supervised:
|
||||
assert np.sum(~(x_mask | ~wy)) == 0
|
||||
|
||||
return feed_dict
|
||||
|
||||
|
||||
def bi_attention(config, is_train, h, u, h_mask=None, u_mask=None, scope=None, tensor_dict=None):
|
||||
with tf.variable_scope(scope or "bi_attention"):
|
||||
JX = tf.shape(h)[2]
|
||||
M = tf.shape(h)[1]
|
||||
JQ = tf.shape(u)[1]
|
||||
h_aug = tf.tile(tf.expand_dims(h, 3), [1, 1, 1, JQ, 1])
|
||||
u_aug = tf.tile(tf.expand_dims(tf.expand_dims(u, 1), 1), [1, M, JX, 1, 1])
|
||||
if h_mask is None:
|
||||
hu_mask = None
|
||||
else:
|
||||
h_mask_aug = tf.tile(tf.expand_dims(h_mask, 3), [1, 1, 1, JQ])
|
||||
u_mask_aug = tf.tile(tf.expand_dims(tf.expand_dims(u_mask, 1), 1), [1, M, JX, 1])
|
||||
hu_mask = h_mask_aug & u_mask_aug
|
||||
|
||||
u_logits = get_logits([h_aug, u_aug], None, True, wd=config.wd, mask=hu_mask,
|
||||
is_train=is_train, func=config.logit_func, scope='u_logits') # [N, M, JX, JQ]
|
||||
u_a = softsel(u_aug, u_logits) # [N, M, JX, d]
|
||||
h_a = softsel(h, tf.reduce_max(u_logits, 3)) # [N, M, d]
|
||||
h_a = tf.tile(tf.expand_dims(h_a, 2), [1, 1, JX, 1])
|
||||
|
||||
if tensor_dict is not None:
|
||||
a_u = tf.nn.softmax(u_logits) # [N, M, JX, JQ]
|
||||
a_h = tf.nn.softmax(tf.reduce_max(u_logits, 3))
|
||||
tensor_dict['a_u'] = a_u
|
||||
tensor_dict['a_h'] = a_h
|
||||
variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=tf.get_variable_scope().name)
|
||||
for var in variables:
|
||||
tensor_dict[var.name] = var
|
||||
|
||||
return u_a, h_a
|
||||
|
||||
|
||||
def attention_layer(config, is_train, h, u, h_mask=None, u_mask=None, scope=None, tensor_dict=None):
|
||||
with tf.variable_scope(scope or "attention_layer"):
|
||||
JX = tf.shape(h)[2]
|
||||
M = tf.shape(h)[1]
|
||||
JQ = tf.shape(u)[1]
|
||||
if config.q2c_att or config.c2q_att:
|
||||
u_a, h_a = bi_attention(config, is_train, h, u, h_mask=h_mask, u_mask=u_mask, tensor_dict=tensor_dict)
|
||||
if not config.c2q_att:
|
||||
u_a = tf.tile(tf.expand_dims(tf.expand_dims(tf.reduce_mean(u, 1), 1), 1), [1, M, JX, 1])
|
||||
if config.q2c_att:
|
||||
p0 = tf.concat(axis=3, values=[h, u_a, h * u_a, h * h_a])
|
||||
else:
|
||||
p0 = tf.concat(axis=3, values=[h, u_a, h * u_a])
|
||||
return p0
|
316
tensorflow/SQuAD/basic/read_data.py
Normal file
316
tensorflow/SQuAD/basic/read_data.py
Normal file
|
@ -0,0 +1,316 @@
|
|||
import json
|
||||
import os
|
||||
import random
|
||||
import itertools
|
||||
import math
|
||||
from collections import defaultdict
|
||||
|
||||
import numpy as np
|
||||
|
||||
from my.tensorflow import grouper
|
||||
from my.utils import index
|
||||
|
||||
|
||||
class Data(object):
|
||||
def get_size(self):
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_by_idxs(self, idxs):
|
||||
"""
|
||||
Efficient way to obtain a batch of items from filesystem
|
||||
:param idxs:
|
||||
:return dict: {'X': [,], 'Y', }
|
||||
"""
|
||||
data = defaultdict(list)
|
||||
for idx in idxs:
|
||||
each_data = self.get_one(idx)
|
||||
for key, val in each_data.items():
|
||||
data[key].append(val)
|
||||
return data
|
||||
|
||||
def get_one(self, idx):
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_empty(self):
|
||||
raise NotImplementedError()
|
||||
|
||||
def __add__(self, other):
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class DataSet(object):
|
||||
def __init__(self, data, data_type, shared=None, valid_idxs=None):
|
||||
self.data = data # e.g. {'X': [0, 1, 2], 'Y': [2, 3, 4]}
|
||||
self.data_type = data_type
|
||||
self.shared = shared
|
||||
total_num_examples = self.get_data_size()
|
||||
self.valid_idxs = range(total_num_examples) if valid_idxs is None else valid_idxs
|
||||
self.num_examples = len(self.valid_idxs)
|
||||
|
||||
def _sort_key(self, idx):
|
||||
rx = self.data['*x'][idx]
|
||||
x = self.shared['x'][rx[0]][rx[1]]
|
||||
return max(map(len, x))
|
||||
|
||||
def get_data_size(self):
|
||||
if isinstance(self.data, dict):
|
||||
return len(next(iter(self.data.values())))
|
||||
elif isinstance(self.data, Data):
|
||||
return self.data.get_size()
|
||||
raise Exception()
|
||||
|
||||
def get_by_idxs(self, idxs):
|
||||
if isinstance(self.data, dict):
|
||||
out = defaultdict(list)
|
||||
for key, val in self.data.items():
|
||||
out[key].extend(val[idx] for idx in idxs)
|
||||
return out
|
||||
elif isinstance(self.data, Data):
|
||||
return self.data.get_by_idxs(idxs)
|
||||
raise Exception()
|
||||
|
||||
def get_batches(self, batch_size, num_batches=None, shuffle=False, cluster=False):
|
||||
"""
|
||||
|
||||
:param batch_size:
|
||||
:param num_batches:
|
||||
:param shuffle:
|
||||
:param cluster: cluster examples by their lengths; this might give performance boost (i.e. faster training).
|
||||
:return:
|
||||
"""
|
||||
num_batches_per_epoch = int(math.ceil(self.num_examples / batch_size))
|
||||
if num_batches is None:
|
||||
num_batches = num_batches_per_epoch
|
||||
num_epochs = int(math.ceil(num_batches / num_batches_per_epoch))
|
||||
|
||||
if shuffle:
|
||||
random_idxs = random.sample(self.valid_idxs, len(self.valid_idxs))
|
||||
if cluster:
|
||||
sorted_idxs = sorted(random_idxs, key=self._sort_key)
|
||||
sorted_grouped = lambda: list(grouper(sorted_idxs, batch_size))
|
||||
grouped = lambda: random.sample(sorted_grouped(), num_batches_per_epoch)
|
||||
else:
|
||||
random_grouped = lambda: list(grouper(random_idxs, batch_size))
|
||||
grouped = random_grouped
|
||||
else:
|
||||
raw_grouped = lambda: list(grouper(self.valid_idxs, batch_size))
|
||||
grouped = raw_grouped
|
||||
|
||||
batch_idx_tuples = itertools.chain.from_iterable(grouped() for _ in range(num_epochs))
|
||||
for _ in range(num_batches):
|
||||
batch_idxs = tuple(i for i in next(batch_idx_tuples) if i is not None)
|
||||
batch_data = self.get_by_idxs(batch_idxs)
|
||||
shared_batch_data = {}
|
||||
for key, val in batch_data.items():
|
||||
if key.startswith('*'):
|
||||
assert self.shared is not None
|
||||
shared_key = key[1:]
|
||||
shared_batch_data[shared_key] = [index(self.shared[shared_key], each) for each in val]
|
||||
batch_data.update(shared_batch_data)
|
||||
|
||||
batch_ds = DataSet(batch_data, self.data_type, shared=self.shared)
|
||||
yield batch_idxs, batch_ds
|
||||
|
||||
def get_multi_batches(self, batch_size, num_batches_per_step, num_steps=None, shuffle=False, cluster=False):
|
||||
batch_size_per_step = batch_size * num_batches_per_step
|
||||
batches = self.get_batches(batch_size_per_step, num_batches=num_steps, shuffle=shuffle, cluster=cluster)
|
||||
multi_batches = (tuple(zip(grouper(idxs, batch_size, shorten=True, num_groups=num_batches_per_step),
|
||||
data_set.divide(num_batches_per_step))) for idxs, data_set in batches)
|
||||
return multi_batches
|
||||
|
||||
def get_empty(self):
|
||||
if isinstance(self.data, dict):
|
||||
data = {key: [] for key in self.data}
|
||||
elif isinstance(self.data, Data):
|
||||
data = self.data.get_empty()
|
||||
else:
|
||||
raise Exception()
|
||||
return DataSet(data, self.data_type, shared=self.shared)
|
||||
|
||||
def __add__(self, other):
|
||||
if isinstance(self.data, dict):
|
||||
data = {key: val + other.data[key] for key, val in self.data.items()}
|
||||
elif isinstance(self.data, Data):
|
||||
data = self.data + other.data
|
||||
else:
|
||||
raise Exception()
|
||||
|
||||
valid_idxs = list(self.valid_idxs) + [valid_idx + self.num_examples for valid_idx in other.valid_idxs]
|
||||
return DataSet(data, self.data_type, shared=self.shared, valid_idxs=valid_idxs)
|
||||
|
||||
def divide(self, integer):
|
||||
batch_size = int(math.ceil(self.num_examples / integer))
|
||||
idxs_gen = grouper(self.valid_idxs, batch_size, shorten=True, num_groups=integer)
|
||||
data_gen = (self.get_by_idxs(idxs) for idxs in idxs_gen)
|
||||
ds_tuple = tuple(DataSet(data, self.data_type, shared=self.shared) for data in data_gen)
|
||||
return ds_tuple
|
||||
|
||||
|
||||
def load_metadata(config, data_type):
|
||||
metadata_path = os.path.join(config.data_dir, "metadata_{}.json".format(data_type))
|
||||
with open(metadata_path, 'r') as fh:
|
||||
metadata = json.load(fh)
|
||||
for key, val in metadata.items():
|
||||
config.__setattr__(key, val)
|
||||
return metadata
|
||||
|
||||
|
||||
def read_data(config, data_type, ref, data_filter=None):
|
||||
data_path = os.path.join(config.data_dir, "data_{}.json".format(data_type))
|
||||
shared_path = os.path.join(config.data_dir, "shared_{}.json".format(data_type))
|
||||
with open(data_path, 'r') as fh:
|
||||
data = json.load(fh)
|
||||
with open(shared_path, 'r') as fh:
|
||||
shared = json.load(fh)
|
||||
|
||||
num_examples = len(next(iter(data.values())))
|
||||
if data_filter is None:
|
||||
valid_idxs = range(num_examples)
|
||||
else:
|
||||
mask = []
|
||||
keys = data.keys()
|
||||
values = data.values()
|
||||
for vals in zip(*values):
|
||||
each = {key: val for key, val in zip(keys, vals)}
|
||||
mask.append(data_filter(each, shared))
|
||||
valid_idxs = [idx for idx in range(len(mask)) if mask[idx]]
|
||||
|
||||
print("Loaded {}/{} examples from {}".format(len(valid_idxs), num_examples, data_type))
|
||||
|
||||
shared_path = config.shared_path or os.path.join(config.out_dir, "shared.json")
|
||||
if not ref:
|
||||
word2vec_dict = shared['lower_word2vec'] if config.lower_word else shared['word2vec']
|
||||
word_counter = shared['lower_word_counter'] if config.lower_word else shared['word_counter']
|
||||
char_counter = shared['char_counter']
|
||||
if config.finetune:
|
||||
shared['word2idx'] = {word: idx + 2 for idx, word in
|
||||
enumerate(word for word, count in word_counter.items()
|
||||
if count > config.word_count_th or (config.known_if_glove and word in word2vec_dict))}
|
||||
else:
|
||||
assert config.known_if_glove
|
||||
assert config.use_glove_for_unk
|
||||
shared['word2idx'] = {word: idx + 2 for idx, word in
|
||||
enumerate(word for word, count in word_counter.items()
|
||||
if count > config.word_count_th and word not in word2vec_dict)}
|
||||
shared['char2idx'] = {char: idx + 2 for idx, char in
|
||||
enumerate(char for char, count in char_counter.items()
|
||||
if count > config.char_count_th)}
|
||||
NULL = "-NULL-"
|
||||
UNK = "-UNK-"
|
||||
shared['word2idx'][NULL] = 0
|
||||
shared['word2idx'][UNK] = 1
|
||||
shared['char2idx'][NULL] = 0
|
||||
shared['char2idx'][UNK] = 1
|
||||
json.dump({'word2idx': shared['word2idx'], 'char2idx': shared['char2idx']}, open(shared_path, 'w'))
|
||||
else:
|
||||
new_shared = json.load(open(shared_path, 'r'))
|
||||
for key, val in new_shared.items():
|
||||
shared[key] = val
|
||||
|
||||
if config.use_glove_for_unk:
|
||||
# create new word2idx and word2vec
|
||||
word2vec_dict = shared['lower_word2vec'] if config.lower_word else shared['word2vec']
|
||||
new_word2idx_dict = {word: idx for idx, word in enumerate(word for word in word2vec_dict.keys() if word not in shared['word2idx'])}
|
||||
shared['new_word2idx'] = new_word2idx_dict
|
||||
offset = len(shared['word2idx'])
|
||||
word2vec_dict = shared['lower_word2vec'] if config.lower_word else shared['word2vec']
|
||||
new_word2idx_dict = shared['new_word2idx']
|
||||
idx2vec_dict = {idx: word2vec_dict[word] for word, idx in new_word2idx_dict.items()}
|
||||
# print("{}/{} unique words have corresponding glove vectors.".format(len(idx2vec_dict), len(word2idx_dict)))
|
||||
new_emb_mat = np.array([idx2vec_dict[idx] for idx in range(len(idx2vec_dict))], dtype='float32')
|
||||
shared['new_emb_mat'] = new_emb_mat
|
||||
|
||||
data_set = DataSet(data, data_type, shared=shared, valid_idxs=valid_idxs)
|
||||
return data_set
|
||||
|
||||
|
||||
def get_squad_data_filter(config):
|
||||
def data_filter(data_point, shared):
|
||||
assert shared is not None
|
||||
rx, rcx, q, cq, y = (data_point[key] for key in ('*x', '*cx', 'q', 'cq', 'y'))
|
||||
x, cx = shared['x'], shared['cx']
|
||||
if len(q) > config.ques_size_th:
|
||||
return False
|
||||
|
||||
# x filter
|
||||
xi = x[rx[0]][rx[1]]
|
||||
if config.squash:
|
||||
for start, stop in y:
|
||||
stop_offset = sum(map(len, xi[:stop[0]]))
|
||||
if stop_offset + stop[1] > config.para_size_th:
|
||||
return False
|
||||
return True
|
||||
|
||||
if config.single:
|
||||
for start, stop in y:
|
||||
if start[0] != stop[0]:
|
||||
return False
|
||||
|
||||
if config.data_filter == 'max':
|
||||
for start, stop in y:
|
||||
if stop[0] >= config.num_sents_th:
|
||||
return False
|
||||
if start[0] != stop[0]:
|
||||
return False
|
||||
if stop[1] >= config.sent_size_th:
|
||||
return False
|
||||
elif config.data_filter == 'valid':
|
||||
if len(xi) > config.num_sents_th:
|
||||
return False
|
||||
if any(len(xij) > config.sent_size_th for xij in xi):
|
||||
return False
|
||||
elif config.data_filter == 'semi':
|
||||
"""
|
||||
Only answer sentence needs to be valid.
|
||||
"""
|
||||
for start, stop in y:
|
||||
if stop[0] >= config.num_sents_th:
|
||||
return False
|
||||
if start[0] != start[0]:
|
||||
return False
|
||||
if len(xi[start[0]]) > config.sent_size_th:
|
||||
return False
|
||||
else:
|
||||
raise Exception()
|
||||
|
||||
return True
|
||||
return data_filter
|
||||
|
||||
|
||||
def update_config(config, data_sets):
|
||||
config.max_num_sents = 0
|
||||
config.max_sent_size = 0
|
||||
config.max_ques_size = 0
|
||||
config.max_word_size = 0
|
||||
config.max_para_size = 0
|
||||
for data_set in data_sets:
|
||||
data = data_set.data
|
||||
shared = data_set.shared
|
||||
for idx in data_set.valid_idxs:
|
||||
rx = data['*x'][idx]
|
||||
q = data['q'][idx]
|
||||
sents = shared['x'][rx[0]][rx[1]]
|
||||
config.max_para_size = max(config.max_para_size, sum(map(len, sents)))
|
||||
config.max_num_sents = max(config.max_num_sents, len(sents))
|
||||
config.max_sent_size = max(config.max_sent_size, max(map(len, sents)))
|
||||
config.max_word_size = max(config.max_word_size, max(len(word) for sent in sents for word in sent))
|
||||
if len(q) > 0:
|
||||
config.max_ques_size = max(config.max_ques_size, len(q))
|
||||
config.max_word_size = max(config.max_word_size, max(len(word) for word in q))
|
||||
|
||||
if config.mode == 'train':
|
||||
config.max_num_sents = min(config.max_num_sents, config.num_sents_th)
|
||||
config.max_sent_size = min(config.max_sent_size, config.sent_size_th)
|
||||
config.max_para_size = min(config.max_para_size, config.para_size_th)
|
||||
|
||||
config.max_word_size = min(config.max_word_size, config.word_size_th)
|
||||
|
||||
config.char_vocab_size = len(data_sets[0].shared['char2idx'])
|
||||
config.word_emb_size = len(next(iter(data_sets[0].shared['word2vec'].values())))
|
||||
config.word_vocab_size = len(data_sets[0].shared['word2idx'])
|
||||
|
||||
if config.single:
|
||||
config.max_num_sents = 1
|
||||
if config.squash:
|
||||
config.max_sent_size = config.max_para_size
|
||||
config.max_num_sents = 1
|
29
tensorflow/SQuAD/basic/run_ensemble.sh
Executable file
29
tensorflow/SQuAD/basic/run_ensemble.sh
Executable file
|
@ -0,0 +1,29 @@
|
|||
#!/usr/bin/env bash
|
||||
source_path=$1
|
||||
target_path=$2
|
||||
inter_dir="inter_ensemble"
|
||||
root_dir="save"
|
||||
|
||||
parg=""
|
||||
marg=""
|
||||
if [ "$3" = "debug" ]
|
||||
then
|
||||
parg="-d"
|
||||
marg="--debug"
|
||||
fi
|
||||
|
||||
# Preprocess data
|
||||
python3 -m squad.prepro --mode single --single_path $source_path $parg --target_dir $inter_dir --glove_dir .
|
||||
|
||||
eargs=""
|
||||
for num in 31 33 34 35 36 37 40 41 43 44 45 46; do
|
||||
load_path="$root_dir/$num/save"
|
||||
shared_path="$root_dir/$num/shared.json"
|
||||
eval_path="$inter_dir/eval-$num.pklz"
|
||||
eargs="$eargs $eval_path"
|
||||
python3 -m basic.cli --data_dir $inter_dir --eval_path $eval_path --nodump_answer --load_path $load_path --shared_path $shared_path $marg --eval_num_batches 0 --mode forward --batch_size 1 --len_opt --cluster --cpu_opt --load_ema &
|
||||
done
|
||||
wait
|
||||
|
||||
# Ensemble
|
||||
python3 -m basic.ensemble --data_path $inter_dir/data_single.json --shared_path $inter_dir/shared_single.json -o $target_path $eargs
|
27
tensorflow/SQuAD/basic/run_single.sh
Executable file
27
tensorflow/SQuAD/basic/run_single.sh
Executable file
|
@ -0,0 +1,27 @@
|
|||
#!/usr/bin/env bash
|
||||
source_path=$1
|
||||
target_path=$2
|
||||
inter_dir="inter_single"
|
||||
root_dir="save"
|
||||
|
||||
parg=""
|
||||
marg=""
|
||||
if [ "$3" = "debug" ]
|
||||
then
|
||||
parg="-d"
|
||||
marg="--debug"
|
||||
fi
|
||||
|
||||
# Preprocess data
|
||||
python3 -m squad.prepro --mode single --single_path $source_path $parg --target_dir $inter_dir --glove_dir .
|
||||
|
||||
num=37
|
||||
load_path="$root_dir/$num/save"
|
||||
shared_path="$root_dir/$num/shared.json"
|
||||
eval_path="$inter_dir/eval.pklz"
|
||||
python3 -m basic.cli --data_dir $inter_dir --eval_path $eval_path --nodump_answer --load_path $load_path --shared_path $shared_path $marg --eval_num_batches 0 --mode forward --batch_size 1 --len_opt --cluster --cpu_opt --load_ema
|
||||
|
||||
# Ensemble (for single run, just one input)
|
||||
python3 -m basic.ensemble --data_path $inter_dir/data_single.json --shared_path $inter_dir/shared_single.json -o $target_path $eval_path
|
||||
|
||||
|
76
tensorflow/SQuAD/basic/templates/visualizer.html
Normal file
76
tensorflow/SQuAD/basic/templates/visualizer.html
Normal file
|
@ -0,0 +1,76 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>{{ title }}</title>
|
||||
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.12.0/jquery.min.js"></script>
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/chroma-js/1.1.1/chroma.min.js"></script>
|
||||
<script>
|
||||
$(document).ready(function(){
|
||||
$(".att").each(function() {
|
||||
// var val = parseFloat($(this).text());
|
||||
var val = parseFloat($(this).attr("color"));
|
||||
var scale = chroma.scale(['white', 'red']);
|
||||
var color = scale(val).hex();
|
||||
$(this).attr("bgcolor", color);
|
||||
});
|
||||
})
|
||||
</script>
|
||||
</head>
|
||||
<style>
|
||||
table, th, td {border: 1px solid black}
|
||||
</style>
|
||||
<body>
|
||||
<h2>{{ title }}</h2>
|
||||
<table>
|
||||
<tr>
|
||||
<th>ID</th>
|
||||
<th>Question</th>
|
||||
<th>Answers</th>
|
||||
<th>Predicted</th>
|
||||
<th>Score</th>
|
||||
<th>Paragraph</th>
|
||||
</tr>
|
||||
{% for row in rows %}
|
||||
<tr>
|
||||
<td>{{ row.id }}</td>
|
||||
<td>
|
||||
{% for qj in row.ques %}
|
||||
{{ qj }}
|
||||
{% endfor %}
|
||||
</td>
|
||||
<td>
|
||||
{% for aa in row.a %}
|
||||
<li>{{ aa }}</li>
|
||||
{% endfor %}
|
||||
</td>
|
||||
<td>{{ row.ap }}</td>
|
||||
<td>{{ row.score }}</td>
|
||||
<td>
|
||||
<table>
|
||||
{% for xj, ypj, yp2j in zip(row.para, row.yp, row.yp2) %}
|
||||
<tr>
|
||||
{% set rowloop = loop %}
|
||||
{% for xjk, ypjk in zip(xj, ypj) %}
|
||||
<td class="att" color="{{ ypjk }}">
|
||||
{% if row.y[0][0] == rowloop.index0 and row.y[0][1] <= loop.index0 <= row.y[1][1] %}
|
||||
<b>{{ xjk }}</b>
|
||||
{% else %}
|
||||
{{ xjk }}
|
||||
{% endif %}
|
||||
</td>
|
||||
{% endfor %}
|
||||
</tr>
|
||||
<tr>
|
||||
{% for xjk, yp2jk in zip(xj, yp2j) %}
|
||||
<td class="att" color="{{ yp2jk }}">-</td>
|
||||
{% endfor %}
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
73
tensorflow/SQuAD/basic/trainer.py
Normal file
73
tensorflow/SQuAD/basic/trainer.py
Normal file
|
@ -0,0 +1,73 @@
|
|||
import tensorflow as tf
|
||||
|
||||
from basic.model import Model
|
||||
from my.tensorflow import average_gradients
|
||||
|
||||
|
||||
class Trainer(object):
|
||||
def __init__(self, config, model):
|
||||
assert isinstance(model, Model)
|
||||
self.config = config
|
||||
self.model = model
|
||||
self.opt = tf.train.AdamOptimizer(config.init_lr)
|
||||
self.loss = model.get_loss()
|
||||
self.var_list = model.get_var_list()
|
||||
self.global_step = model.get_global_step()
|
||||
self.summary = model.summary
|
||||
self.grads = self.opt.compute_gradients(self.loss, var_list=self.var_list)
|
||||
self.train_op = self.opt.apply_gradients(self.grads, global_step=self.global_step)
|
||||
|
||||
def get_train_op(self):
|
||||
return self.train_op
|
||||
|
||||
def step(self, sess, batch, get_summary=False):
|
||||
assert isinstance(sess, tf.Session)
|
||||
_, ds = batch
|
||||
feed_dict = self.model.get_feed_dict(ds, True)
|
||||
if get_summary:
|
||||
loss, summary, train_op = \
|
||||
sess.run([self.loss, self.summary, self.train_op], feed_dict=feed_dict)
|
||||
else:
|
||||
loss, train_op = sess.run([self.loss, self.train_op], feed_dict=feed_dict)
|
||||
summary = None
|
||||
return loss, summary, train_op
|
||||
|
||||
|
||||
class MultiGPUTrainer(object):
|
||||
def __init__(self, config, models):
|
||||
model = models[0]
|
||||
assert isinstance(model, Model)
|
||||
self.config = config
|
||||
self.model = model
|
||||
self.opt = tf.train.AdamOptimizer(config.init_lr)
|
||||
self.var_list = model.get_var_list()
|
||||
self.global_step = model.get_global_step()
|
||||
self.summary = model.summary
|
||||
self.models = models
|
||||
losses = []
|
||||
grads_list = []
|
||||
for gpu_idx, model in enumerate(models):
|
||||
with tf.name_scope("grads_{}".format(gpu_idx)), tf.device("/{}:{}".format(config.device_type, gpu_idx)):
|
||||
loss = model.get_loss()
|
||||
grads = self.opt.compute_gradients(loss, var_list=self.var_list)
|
||||
losses.append(loss)
|
||||
grads_list.append(grads)
|
||||
|
||||
self.loss = tf.add_n(losses)/len(losses)
|
||||
self.grads = average_gradients(grads_list)
|
||||
self.train_op = self.opt.apply_gradients(self.grads, global_step=self.global_step)
|
||||
|
||||
def step(self, sess, batches, get_summary=False):
|
||||
assert isinstance(sess, tf.Session)
|
||||
feed_dict = {}
|
||||
for batch, model in zip(batches, self.models):
|
||||
_, ds = batch
|
||||
feed_dict.update(model.get_feed_dict(ds, True))
|
||||
|
||||
if get_summary:
|
||||
loss, summary, train_op = \
|
||||
sess.run([self.loss, self.summary, self.train_op], feed_dict=feed_dict)
|
||||
else:
|
||||
loss, train_op = sess.run([self.loss, self.train_op], feed_dict=feed_dict)
|
||||
summary = None
|
||||
return loss, summary, train_op
|
140
tensorflow/SQuAD/basic/visualizer.py
Normal file
140
tensorflow/SQuAD/basic/visualizer.py
Normal file
|
@ -0,0 +1,140 @@
|
|||
import shutil
|
||||
from collections import OrderedDict
|
||||
import http.server
|
||||
import socketserver
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
import pickle
|
||||
import gzip
|
||||
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
|
||||
from squad.utils import get_best_span, get_best_span_wy
|
||||
|
||||
|
||||
def bool_(string):
|
||||
if string == 'True':
|
||||
return True
|
||||
elif string == 'False':
|
||||
return False
|
||||
else:
|
||||
raise Exception()
|
||||
|
||||
def get_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--model_name", type=str, default='basic')
|
||||
parser.add_argument("--data_type", type=str, default='dev')
|
||||
parser.add_argument("--step", type=int, default=5000)
|
||||
parser.add_argument("--template_name", type=str, default="visualizer.html")
|
||||
parser.add_argument("--num_per_page", type=int, default=100)
|
||||
parser.add_argument("--data_dir", type=str, default="data/squad")
|
||||
parser.add_argument("--port", type=int, default=8000)
|
||||
parser.add_argument("--host", type=str, default="0.0.0.0")
|
||||
parser.add_argument("--open", type=str, default='False')
|
||||
parser.add_argument("--run_id", type=str, default="0")
|
||||
parser.add_argument("-w", "--wy", action='store_true')
|
||||
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def _decode(decoder, sent):
|
||||
return " ".join(decoder[idx] for idx in sent)
|
||||
|
||||
|
||||
def accuracy2_visualizer(args):
|
||||
model_name = args.model_name
|
||||
data_type = args.data_type
|
||||
num_per_page = args.num_per_page
|
||||
data_dir = args.data_dir
|
||||
run_id = args.run_id.zfill(2)
|
||||
step = args.step
|
||||
|
||||
eval_path =os.path.join("out", model_name, run_id, "eval", "{}-{}.pklz".format(data_type, str(step).zfill(6)))
|
||||
print("loading {}".format(eval_path))
|
||||
eval_ = pickle.load(gzip.open(eval_path, 'r'))
|
||||
|
||||
_id = 0
|
||||
html_dir = "/tmp/list_results%d" % _id
|
||||
while os.path.exists(html_dir):
|
||||
_id += 1
|
||||
html_dir = "/tmp/list_results%d" % _id
|
||||
|
||||
if os.path.exists(html_dir):
|
||||
shutil.rmtree(html_dir)
|
||||
os.mkdir(html_dir)
|
||||
|
||||
cur_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
templates_dir = os.path.join(cur_dir, 'templates')
|
||||
env = Environment(loader=FileSystemLoader(templates_dir))
|
||||
env.globals.update(zip=zip, reversed=reversed)
|
||||
template = env.get_template(args.template_name)
|
||||
|
||||
data_path = os.path.join(data_dir, "data_{}.json".format(data_type))
|
||||
shared_path = os.path.join(data_dir, "shared_{}.json".format(data_type))
|
||||
print("loading {}".format(data_path))
|
||||
data = json.load(open(data_path, 'r'))
|
||||
print("loading {}".format(shared_path))
|
||||
shared = json.load(open(shared_path, 'r'))
|
||||
|
||||
rows = []
|
||||
for i, (idx, yi, ypi, yp2i, wypi) in tqdm(enumerate(zip(*[eval_[key] for key in ('idxs', 'y', 'yp', 'yp2', 'wyp')])), total=len(eval_['idxs'])):
|
||||
id_, q, rx, answers = (data[key][idx] for key in ('ids', 'q', '*x', 'answerss'))
|
||||
x = shared['x'][rx[0]][rx[1]]
|
||||
ques = [" ".join(q)]
|
||||
para = [[word for word in sent] for sent in x]
|
||||
span, score = get_best_span_wy(wypi, 0.5) if args.wy else get_best_span(ypi, yp2i)
|
||||
ap = get_segment(para, span)
|
||||
# score = "{:.3f}".format(ypi[span[0][0]][span[0][1]] * yp2i[span[1][0]][span[1][1]-1])
|
||||
|
||||
row = {
|
||||
'id': id_,
|
||||
'title': "Hello world!",
|
||||
'ques': ques,
|
||||
'para': para,
|
||||
'y': yi[0][0],
|
||||
'y2': yi[0][1],
|
||||
'yp': wypi if args.wy else ypi,
|
||||
'yp2': wypi if args.wy else yp2i,
|
||||
'a': answers,
|
||||
'ap': ap,
|
||||
'score': score
|
||||
}
|
||||
rows.append(row)
|
||||
|
||||
if i % num_per_page == 0:
|
||||
html_path = os.path.join(html_dir, "%s.html" % str(i).zfill(8))
|
||||
|
||||
if (i + 1) % num_per_page == 0 or (i + 1) == len(eval_['y']):
|
||||
var_dict = {'title': "Accuracy Visualization",
|
||||
'rows': rows
|
||||
}
|
||||
with open(html_path, "wb") as f:
|
||||
f.write(template.render(**var_dict).encode('UTF-8'))
|
||||
rows = []
|
||||
|
||||
os.chdir(html_dir)
|
||||
port = args.port
|
||||
host = args.host
|
||||
# Overriding to suppress log message
|
||||
class MyHandler(http.server.SimpleHTTPRequestHandler):
|
||||
def log_message(self, format, *args):
|
||||
pass
|
||||
handler = MyHandler
|
||||
httpd = socketserver.TCPServer((host, port), handler)
|
||||
if args.open == 'True':
|
||||
os.system("open http://%s:%d" % (args.host, args.port))
|
||||
print("serving at %s:%d" % (host, port))
|
||||
httpd.serve_forever()
|
||||
|
||||
|
||||
def get_segment(para, span):
|
||||
return " ".join(para[span[0][0]][span[0][1]:span[1][1]])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
ARGS = get_args()
|
||||
accuracy2_visualizer(ARGS)
|
0
tensorflow/SQuAD/basic_cnn/__init__.py
Normal file
0
tensorflow/SQuAD/basic_cnn/__init__.py
Normal file
103
tensorflow/SQuAD/basic_cnn/cli.py
Normal file
103
tensorflow/SQuAD/basic_cnn/cli.py
Normal file
|
@ -0,0 +1,103 @@
|
|||
import os
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from basic_cnn.main import main as m
|
||||
|
||||
flags = tf.app.flags
|
||||
|
||||
flags.DEFINE_string("model_name", "basic_cnn", "Model name [basic]")
|
||||
flags.DEFINE_string("data_dir", "data/cnn", "Data dir [data/cnn]")
|
||||
flags.DEFINE_string("root_dir", "/Users/minjoons/data/cnn/questions", "root dir [~/data/cnn/questions]")
|
||||
flags.DEFINE_string("run_id", "0", "Run ID [0]")
|
||||
flags.DEFINE_string("out_base_dir", "out", "out base dir [out]")
|
||||
|
||||
flags.DEFINE_integer("batch_size", 60, "Batch size [60]")
|
||||
flags.DEFINE_float("init_lr", 0.5, "Initial learning rate [0.5]")
|
||||
flags.DEFINE_integer("num_epochs", 50, "Total number of epochs for training [50]")
|
||||
flags.DEFINE_integer("num_steps", 20000, "Number of steps [20000]")
|
||||
flags.DEFINE_integer("eval_num_batches", 100, "eval num batches [100]")
|
||||
flags.DEFINE_integer("load_step", 0, "load step [0]")
|
||||
flags.DEFINE_integer("early_stop", 4, "early stop [4]")
|
||||
|
||||
flags.DEFINE_string("mode", "test", "train | dev | test | forward [test]")
|
||||
flags.DEFINE_boolean("load", True, "load saved data? [True]")
|
||||
flags.DEFINE_boolean("progress", True, "Show progress? [True]")
|
||||
flags.DEFINE_integer("log_period", 100, "Log period [100]")
|
||||
flags.DEFINE_integer("eval_period", 1000, "Eval period [1000]")
|
||||
flags.DEFINE_integer("save_period", 1000, "Save Period [1000]")
|
||||
flags.DEFINE_float("decay", 0.9, "Exponential moving average decay [0.9]")
|
||||
|
||||
flags.DEFINE_boolean("draft", False, "Draft for quick testing? [False]")
|
||||
|
||||
flags.DEFINE_integer("hidden_size", 100, "Hidden size [100]")
|
||||
flags.DEFINE_integer("char_out_size", 100, "Char out size [100]")
|
||||
flags.DEFINE_float("input_keep_prob", 0.8, "Input keep prob [0.8]")
|
||||
flags.DEFINE_integer("char_emb_size", 8, "Char emb size [8]")
|
||||
flags.DEFINE_integer("char_filter_height", 5, "Char filter height [5]")
|
||||
flags.DEFINE_float("wd", 0.0, "Weight decay [0.0]")
|
||||
flags.DEFINE_bool("lower_word", True, "lower word [True]")
|
||||
flags.DEFINE_bool("dump_eval", False, "dump eval? [True]")
|
||||
flags.DEFINE_bool("dump_answer", True, "dump answer? [True]")
|
||||
flags.DEFINE_string("model", "2", "config 1 |2 [2]")
|
||||
flags.DEFINE_bool("squash", False, "squash the sentences into one? [False]")
|
||||
flags.DEFINE_bool("single", False, "supervise only the answer sentence? [False]")
|
||||
|
||||
flags.DEFINE_integer("word_count_th", 10, "word count th [100]")
|
||||
flags.DEFINE_integer("char_count_th", 50, "char count th [500]")
|
||||
flags.DEFINE_integer("sent_size_th", 60, "sent size th [64]")
|
||||
flags.DEFINE_integer("num_sents_th", 200, "num sents th [8]")
|
||||
flags.DEFINE_integer("ques_size_th", 30, "ques size th [32]")
|
||||
flags.DEFINE_integer("word_size_th", 16, "word size th [16]")
|
||||
flags.DEFINE_integer("para_size_th", 256, "para size th [256]")
|
||||
|
||||
flags.DEFINE_bool("swap_memory", True, "swap memory? [True]")
|
||||
flags.DEFINE_string("data_filter", "max", "max | valid | semi [max]")
|
||||
flags.DEFINE_bool("finetune", False, "finetune? [False]")
|
||||
flags.DEFINE_bool("feed_gt", False, "feed gt prev token during training [False]")
|
||||
flags.DEFINE_bool("feed_hard", False, "feed hard argmax prev token during testing [False]")
|
||||
flags.DEFINE_bool("use_glove_for_unk", True, "use glove for unk [False]")
|
||||
flags.DEFINE_bool("known_if_glove", True, "consider as known if present in glove [False]")
|
||||
flags.DEFINE_bool("eval", True, "eval? [True]")
|
||||
flags.DEFINE_integer("highway_num_layers", 2, "highway num layers [2]")
|
||||
flags.DEFINE_bool("use_word_emb", True, "use word embedding? [True]")
|
||||
|
||||
flags.DEFINE_string("forward_name", "single", "Forward name [single]")
|
||||
flags.DEFINE_string("answer_path", "", "Answer path []")
|
||||
flags.DEFINE_string("load_path", "", "Load path []")
|
||||
flags.DEFINE_string("shared_path", "", "Shared path []")
|
||||
flags.DEFINE_string("device", "/cpu:0", "default device [/cpu:0]")
|
||||
flags.DEFINE_integer("num_gpus", 1, "num of gpus [1]")
|
||||
|
||||
flags.DEFINE_string("out_channel_dims", "100", "Out channel dims, separated by commas [100]")
|
||||
flags.DEFINE_string("filter_heights", "5", "Filter heights, separated by commas [5]")
|
||||
|
||||
flags.DEFINE_bool("share_cnn_weights", True, "Share CNN weights [False]")
|
||||
flags.DEFINE_bool("share_lstm_weights", True, "Share LSTM weights [True]")
|
||||
flags.DEFINE_bool("two_prepro_layers", False, "Use two layers for preprocessing? [False]")
|
||||
flags.DEFINE_bool("aug_att", False, "Augment attention layers with more features? [False]")
|
||||
flags.DEFINE_integer("max_to_keep", 20, "Max recent saves to keep [20]")
|
||||
flags.DEFINE_bool("vis", False, "output visualization numbers? [False]")
|
||||
flags.DEFINE_bool("dump_pickle", True, "Dump pickle instead of json? [True]")
|
||||
flags.DEFINE_float("keep_prob", 1.0, "keep prob [1.0]")
|
||||
flags.DEFINE_string("prev_mode", "a", "prev mode gy | y | a [a]")
|
||||
flags.DEFINE_string("logit_func", "tri_linear", "logit func [tri_linear]")
|
||||
flags.DEFINE_bool("sh", False, "use superhighway [False]")
|
||||
flags.DEFINE_string("answer_func", "linear", "answer logit func [linear]")
|
||||
flags.DEFINE_bool("cluster", False, "Cluster data for faster training [False]")
|
||||
flags.DEFINE_bool("len_opt", False, "Length optimization? [False]")
|
||||
flags.DEFINE_string("sh_logit_func", "tri_linear", "sh logit func [tri_linear]")
|
||||
flags.DEFINE_float("filter_ratio", 1.0, "filter ratio [1.0]")
|
||||
flags.DEFINE_bool("bi", False, "bi-directional attention? [False]")
|
||||
flags.DEFINE_integer("width", 5, "width around entity [5]")
|
||||
|
||||
|
||||
def main(_):
|
||||
config = flags.FLAGS
|
||||
|
||||
config.out_dir = os.path.join(config.out_base_dir, config.model_name, str(config.run_id).zfill(2))
|
||||
|
||||
m(config)
|
||||
|
||||
if __name__ == "__main__":
|
||||
tf.app.run()
|
494
tensorflow/SQuAD/basic_cnn/evaluator.py
Normal file
494
tensorflow/SQuAD/basic_cnn/evaluator.py
Normal file
|
@ -0,0 +1,494 @@
|
|||
import itertools
|
||||
from collections import defaultdict
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
import os
|
||||
|
||||
from basic_cnn.read_data import DataSet
|
||||
from my.nltk_utils import span_f1
|
||||
from my.tensorflow import padded_reshape
|
||||
from my.utils import argmax
|
||||
|
||||
|
||||
class Evaluation(object):
|
||||
def __init__(self, data_type, global_step, idxs, yp, tensor_dict=None):
|
||||
self.data_type = data_type
|
||||
self.global_step = global_step
|
||||
self.idxs = idxs
|
||||
self.yp = yp
|
||||
self.num_examples = len(yp)
|
||||
self.tensor_dict = None
|
||||
self.dict = {'data_type': data_type,
|
||||
'global_step': global_step,
|
||||
'yp': yp,
|
||||
'idxs': idxs,
|
||||
'num_examples': self.num_examples}
|
||||
if tensor_dict is not None:
|
||||
self.tensor_dict = {key: val.tolist() for key, val in tensor_dict.items()}
|
||||
for key, val in self.tensor_dict.items():
|
||||
self.dict[key] = val
|
||||
self.summaries = None
|
||||
|
||||
def __repr__(self):
|
||||
return "{} step {}".format(self.data_type, self.global_step)
|
||||
|
||||
def __add__(self, other):
|
||||
if other == 0:
|
||||
return self
|
||||
assert self.data_type == other.data_type
|
||||
assert self.global_step == other.global_step
|
||||
new_yp = self.yp + other.yp
|
||||
new_idxs = self.idxs + other.idxs
|
||||
new_tensor_dict = None
|
||||
if self.tensor_dict is not None:
|
||||
new_tensor_dict = {key: val + other.tensor_dict[key] for key, val in self.tensor_dict.items()}
|
||||
return Evaluation(self.data_type, self.global_step, new_idxs, new_yp, tensor_dict=new_tensor_dict)
|
||||
|
||||
def __radd__(self, other):
|
||||
return self.__add__(other)
|
||||
|
||||
|
||||
class LabeledEvaluation(Evaluation):
|
||||
def __init__(self, data_type, global_step, idxs, yp, y, id2answer_dict, tensor_dict=None):
|
||||
super(LabeledEvaluation, self).__init__(data_type, global_step, idxs, yp, tensor_dict=tensor_dict)
|
||||
self.y = y
|
||||
self.dict['y'] = y
|
||||
self.id2answer_dict = id2answer_dict
|
||||
|
||||
def __add__(self, other):
|
||||
if other == 0:
|
||||
return self
|
||||
assert self.data_type == other.data_type
|
||||
assert self.global_step == other.global_step
|
||||
new_yp = self.yp + other.yp
|
||||
new_y = self.y + other.y
|
||||
new_idxs = self.idxs + other.idxs
|
||||
new_id2answer_dict = dict(list(self.id2answer_dict.items()) + list(other.id2answer_dict.items()))
|
||||
new_id2score_dict = dict(list(self.id2answer_dict['scores'].items()) + list(other.id2answer_dict['scores'].items()))
|
||||
new_id2answer_dict['scores'] = new_id2score_dict
|
||||
if self.tensor_dict is not None:
|
||||
new_tensor_dict = {key: np.concatenate((val, other.tensor_dict[key]), axis=0) for key, val in self.tensor_dict.items()}
|
||||
return LabeledEvaluation(self.data_type, self.global_step, new_idxs, new_yp, new_y, new_id2answer_dict, tensor_dict=new_tensor_dict)
|
||||
|
||||
|
||||
class AccuracyEvaluation(LabeledEvaluation):
|
||||
def __init__(self, data_type, global_step, idxs, yp, y, id2answer_dict, correct, loss, tensor_dict=None):
|
||||
super(AccuracyEvaluation, self).__init__(data_type, global_step, idxs, yp, y, id2answer_dict, tensor_dict=tensor_dict)
|
||||
self.loss = loss
|
||||
self.correct = correct
|
||||
self.id2answer_dict = id2answer_dict
|
||||
self.acc = sum(correct) / len(correct)
|
||||
self.dict['loss'] = loss
|
||||
self.dict['correct'] = correct
|
||||
self.dict['acc'] = self.acc
|
||||
loss_summary = tf.Summary(value=[tf.Summary.Value(tag='{}/loss'.format(data_type), simple_value=self.loss)])
|
||||
acc_summary = tf.Summary(value=[tf.Summary.Value(tag='{}/acc'.format(data_type), simple_value=self.acc)])
|
||||
self.summaries = [loss_summary, acc_summary]
|
||||
|
||||
def __repr__(self):
|
||||
return "{} step {}: accuracy={}={}/{}, loss={}".format(self.data_type, self.global_step, self.acc,
|
||||
sum(self.correct), self.num_examples, self.loss)
|
||||
|
||||
def __add__(self, other):
|
||||
if other == 0:
|
||||
return self
|
||||
assert self.data_type == other.data_type
|
||||
assert self.global_step == other.global_step
|
||||
new_idxs = self.idxs + other.idxs
|
||||
new_yp = self.yp + other.yp
|
||||
new_y = self.y + other.y
|
||||
new_correct = self.correct + other.correct
|
||||
new_loss = (self.loss * self.num_examples + other.loss * other.num_examples) / len(new_correct)
|
||||
new_id2answer_dict = dict(list(self.id2answer_dict.items()) + list(other.id2answer_dict.items()))
|
||||
new_id2score_dict = dict(list(self.id2answer_dict['scores'].items()) + list(other.id2answer_dict['scores'].items()))
|
||||
new_id2answer_dict['scores'] = new_id2score_dict
|
||||
new_tensor_dict = None
|
||||
if self.tensor_dict is not None:
|
||||
new_tensor_dict = {key: np.concatenate((val, other.tensor_dict[key]), axis=0) for key, val in self.tensor_dict.items()}
|
||||
return AccuracyEvaluation(self.data_type, self.global_step, new_idxs, new_yp, new_y, new_id2answer_dict, new_correct, new_loss, tensor_dict=new_tensor_dict)
|
||||
|
||||
|
||||
class Evaluator(object):
|
||||
def __init__(self, config, model, tensor_dict=None):
|
||||
self.config = config
|
||||
self.model = model
|
||||
self.global_step = model.global_step
|
||||
self.yp = model.yp
|
||||
self.tensor_dict = {} if tensor_dict is None else tensor_dict
|
||||
|
||||
def get_evaluation(self, sess, batch):
|
||||
idxs, data_set = batch
|
||||
feed_dict = self.model.get_feed_dict(data_set, False, supervised=False)
|
||||
global_step, yp, vals = sess.run([self.global_step, self.yp, list(self.tensor_dict.values())], feed_dict=feed_dict)
|
||||
yp = yp[:data_set.num_examples]
|
||||
tensor_dict = dict(zip(self.tensor_dict.keys(), vals))
|
||||
e = Evaluation(data_set.data_type, int(global_step), idxs, yp.tolist(), tensor_dict=tensor_dict)
|
||||
return e
|
||||
|
||||
def get_evaluation_from_batches(self, sess, batches):
|
||||
e = sum(self.get_evaluation(sess, batch) for batch in batches)
|
||||
return e
|
||||
|
||||
|
||||
class LabeledEvaluator(Evaluator):
|
||||
def __init__(self, config, model, tensor_dict=None):
|
||||
super(LabeledEvaluator, self).__init__(config, model, tensor_dict=tensor_dict)
|
||||
self.y = model.y
|
||||
|
||||
def get_evaluation(self, sess, batch):
|
||||
idxs, data_set = batch
|
||||
feed_dict = self.model.get_feed_dict(data_set, False, supervised=False)
|
||||
global_step, yp, vals = sess.run([self.global_step, self.yp, list(self.tensor_dict.values())], feed_dict=feed_dict)
|
||||
yp = yp[:data_set.num_examples]
|
||||
y = feed_dict[self.y]
|
||||
tensor_dict = dict(zip(self.tensor_dict.keys(), vals))
|
||||
e = LabeledEvaluation(data_set.data_type, int(global_step), idxs, yp.tolist(), y.tolist(), tensor_dict=tensor_dict)
|
||||
return e
|
||||
|
||||
|
||||
class AccuracyEvaluator(LabeledEvaluator):
|
||||
def __init__(self, config, model, tensor_dict=None):
|
||||
super(AccuracyEvaluator, self).__init__(config, model, tensor_dict=tensor_dict)
|
||||
self.loss = model.loss
|
||||
|
||||
def get_evaluation(self, sess, batch):
|
||||
idxs, data_set = self._split_batch(batch)
|
||||
assert isinstance(data_set, DataSet)
|
||||
feed_dict = self._get_feed_dict(batch)
|
||||
y = data_set.data['y']
|
||||
global_step, yp, loss, vals = sess.run([self.global_step, self.yp, self.loss, list(self.tensor_dict.values())], feed_dict=feed_dict)
|
||||
yp = yp[:data_set.num_examples]
|
||||
correct, probs, preds = zip(*[self.__class__.compare(data_set.get_one(idx), ypi) for idx, ypi in zip(data_set.valid_idxs, yp)])
|
||||
tensor_dict = dict(zip(self.tensor_dict.keys(), vals))
|
||||
ids = data_set.data['ids']
|
||||
id2score_dict = {id_: prob for id_, prob in zip(ids, probs)}
|
||||
id2answer_dict = {id_: pred for id_, pred in zip(ids, preds)}
|
||||
id2answer_dict['scores'] = id2score_dict
|
||||
e = AccuracyEvaluation(data_set.data_type, int(global_step), idxs, yp.tolist(), y, id2answer_dict, correct, float(loss), tensor_dict=tensor_dict)
|
||||
return e
|
||||
|
||||
@staticmethod
|
||||
def compare(data, ypi):
|
||||
prob = float(np.max(ypi))
|
||||
yi = data['y']
|
||||
for start, stop in yi:
|
||||
if start == int(np.argmax(ypi)):
|
||||
return True, prob, " "
|
||||
return False, prob, " "
|
||||
|
||||
def _split_batch(self, batch):
|
||||
return batch
|
||||
|
||||
def _get_feed_dict(self, batch):
|
||||
return self.model.get_feed_dict(batch[1], False)
|
||||
|
||||
|
||||
class CNNAccuracyEvaluator(AccuracyEvaluator):
|
||||
@staticmethod
|
||||
def compare(data, ypi):
|
||||
# ypi: [N, M, JX] numbers
|
||||
yi = data['y'][0] # entity
|
||||
xi = data['x'][0] # [N, M, JX] words
|
||||
dist = defaultdict(int)
|
||||
for ypij, xij in zip(ypi, xi):
|
||||
for ypijk, xijk in zip(ypij, xij):
|
||||
if xijk.startswith("@"):
|
||||
dist[xijk] += ypijk
|
||||
pred, prob = max(dist.items(), key=lambda item: item[1])
|
||||
assert pred.startswith("@")
|
||||
assert yi.startswith("@")
|
||||
return pred == yi, prob, pred
|
||||
|
||||
|
||||
class AccuracyEvaluator2(AccuracyEvaluator):
|
||||
@staticmethod
|
||||
def compare(yi, ypi):
|
||||
for start, stop in yi:
|
||||
para_start = int(np.argmax(np.max(ypi, 1)))
|
||||
sent_start = int(np.argmax(ypi[para_start]))
|
||||
if tuple(start) == (para_start, sent_start):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
class ForwardEvaluation(Evaluation):
|
||||
def __init__(self, data_type, global_step, idxs, yp, yp2, loss, id2answer_dict, tensor_dict=None):
|
||||
super(ForwardEvaluation, self).__init__(data_type, global_step, idxs, yp, tensor_dict=tensor_dict)
|
||||
self.yp2 = yp2
|
||||
self.loss = loss
|
||||
self.dict['loss'] = loss
|
||||
self.dict['yp2'] = yp2
|
||||
self.id2answer_dict = id2answer_dict
|
||||
|
||||
def __add__(self, other):
|
||||
if other == 0:
|
||||
return self
|
||||
assert self.data_type == other.data_type
|
||||
assert self.global_step == other.global_step
|
||||
new_idxs = self.idxs + other.idxs
|
||||
new_yp = self.yp + other.yp
|
||||
new_yp2 = self.yp2 + other.yp2
|
||||
new_loss = (self.loss * self.num_examples + other.loss * other.num_examples) / len(new_yp)
|
||||
new_id2answer_dict = dict(list(self.id2answer_dict.items()) + list(other.id2answer_dict.items()))
|
||||
if self.tensor_dict is not None:
|
||||
new_tensor_dict = {key: np.concatenate((val, other.tensor_dict[key]), axis=0) for key, val in self.tensor_dict.items()}
|
||||
return ForwardEvaluation(self.data_type, self.global_step, new_idxs, new_yp, new_yp2, new_loss, new_id2answer_dict, tensor_dict=new_tensor_dict)
|
||||
|
||||
def __repr__(self):
|
||||
return "{} step {}: loss={:.4f}".format(self.data_type, self.global_step, self.loss)
|
||||
|
||||
|
||||
class F1Evaluation(AccuracyEvaluation):
|
||||
def __init__(self, data_type, global_step, idxs, yp, yp2, y, correct, loss, f1s, id2answer_dict, tensor_dict=None):
|
||||
super(F1Evaluation, self).__init__(data_type, global_step, idxs, yp, y, correct, loss, tensor_dict=tensor_dict)
|
||||
self.yp2 = yp2
|
||||
self.f1s = f1s
|
||||
self.f1 = float(np.mean(f1s))
|
||||
self.dict['yp2'] = yp2
|
||||
self.dict['f1s'] = f1s
|
||||
self.dict['f1'] = self.f1
|
||||
self.id2answer_dict = id2answer_dict
|
||||
f1_summary = tf.Summary(value=[tf.Summary.Value(tag='{}/f1'.format(data_type), simple_value=self.f1)])
|
||||
self.summaries.append(f1_summary)
|
||||
|
||||
def __add__(self, other):
|
||||
if other == 0:
|
||||
return self
|
||||
assert self.data_type == other.data_type
|
||||
assert self.global_step == other.global_step
|
||||
new_idxs = self.idxs + other.idxs
|
||||
new_yp = self.yp + other.yp
|
||||
new_yp2 = self.yp2 + other.yp2
|
||||
new_y = self.y + other.y
|
||||
new_correct = self.correct + other.correct
|
||||
new_f1s = self.f1s + other.f1s
|
||||
new_loss = (self.loss * self.num_examples + other.loss * other.num_examples) / len(new_correct)
|
||||
new_id2answer_dict = dict(list(self.id2answer_dict.items()) + list(other.id2answer_dict.items()))
|
||||
return F1Evaluation(self.data_type, self.global_step, new_idxs, new_yp, new_yp2, new_y, new_correct, new_loss, new_f1s, new_id2answer_dict)
|
||||
|
||||
def __repr__(self):
|
||||
return "{} step {}: accuracy={:.4f}, f1={:.4f}, loss={:.4f}".format(self.data_type, self.global_step, self.acc, self.f1, self.loss)
|
||||
|
||||
|
||||
class F1Evaluator(LabeledEvaluator):
|
||||
def __init__(self, config, model, tensor_dict=None):
|
||||
super(F1Evaluator, self).__init__(config, model, tensor_dict=tensor_dict)
|
||||
self.yp2 = model.yp2
|
||||
self.loss = model.loss
|
||||
|
||||
def get_evaluation(self, sess, batch):
|
||||
idxs, data_set = self._split_batch(batch)
|
||||
assert isinstance(data_set, DataSet)
|
||||
feed_dict = self._get_feed_dict(batch)
|
||||
global_step, yp, yp2, loss, vals = sess.run([self.global_step, self.yp, self.yp2, self.loss, list(self.tensor_dict.values())], feed_dict=feed_dict)
|
||||
y = data_set.data['y']
|
||||
if self.config.squash:
|
||||
new_y = []
|
||||
for xi, yi in zip(data_set.data['x'], y):
|
||||
new_yi = []
|
||||
for start, stop in yi:
|
||||
start_offset = sum(map(len, xi[:start[0]]))
|
||||
stop_offset = sum(map(len, xi[:stop[0]]))
|
||||
new_start = 0, start_offset + start[1]
|
||||
new_stop = 0, stop_offset + stop[1]
|
||||
new_yi.append((new_start, new_stop))
|
||||
new_y.append(new_yi)
|
||||
y = new_y
|
||||
if self.config.single:
|
||||
new_y = []
|
||||
for yi in y:
|
||||
new_yi = []
|
||||
for start, stop in yi:
|
||||
new_start = 0, start[1]
|
||||
new_stop = 0, stop[1]
|
||||
new_yi.append((new_start, new_stop))
|
||||
new_y.append(new_yi)
|
||||
y = new_y
|
||||
|
||||
yp, yp2 = yp[:data_set.num_examples], yp2[:data_set.num_examples]
|
||||
spans = [get_best_span(ypi, yp2i) for ypi, yp2i in zip(yp, yp2)]
|
||||
|
||||
def _get(xi, span):
|
||||
if len(xi) <= span[0][0]:
|
||||
return [""]
|
||||
if len(xi[span[0][0]]) <= span[1][1]:
|
||||
return [""]
|
||||
return xi[span[0][0]][span[0][1]:span[1][1]]
|
||||
|
||||
id2answer_dict = {id_: " ".join(_get(xi, span))
|
||||
for id_, xi, span in zip(data_set.data['ids'], data_set.data['x'], spans)}
|
||||
correct = [self.__class__.compare2(yi, span) for yi, span in zip(y, spans)]
|
||||
f1s = [self.__class__.span_f1(yi, span) for yi, span in zip(y, spans)]
|
||||
tensor_dict = dict(zip(self.tensor_dict.keys(), vals))
|
||||
e = F1Evaluation(data_set.data_type, int(global_step), idxs, yp.tolist(), yp2.tolist(), y,
|
||||
correct, float(loss), f1s, id2answer_dict, tensor_dict=tensor_dict)
|
||||
return e
|
||||
|
||||
def _split_batch(self, batch):
|
||||
return batch
|
||||
|
||||
def _get_feed_dict(self, batch):
|
||||
return self.model.get_feed_dict(batch[1], False)
|
||||
|
||||
@staticmethod
|
||||
def compare(yi, ypi, yp2i):
|
||||
for start, stop in yi:
|
||||
aypi = argmax(ypi)
|
||||
mask = np.zeros(yp2i.shape)
|
||||
mask[aypi[0], aypi[1]:] = np.ones([yp2i.shape[1] - aypi[1]])
|
||||
if tuple(start) == aypi and (stop[0], stop[1]-1) == argmax(yp2i * mask):
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def compare2(yi, span):
|
||||
for start, stop in yi:
|
||||
if tuple(start) == span[0] and tuple(stop) == span[1]:
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def span_f1(yi, span):
|
||||
max_f1 = 0
|
||||
for start, stop in yi:
|
||||
if start[0] == span[0][0]:
|
||||
true_span = start[1], stop[1]
|
||||
pred_span = span[0][1], span[1][1]
|
||||
f1 = span_f1(true_span, pred_span)
|
||||
max_f1 = max(f1, max_f1)
|
||||
return max_f1
|
||||
|
||||
|
||||
class MultiGPUF1Evaluator(F1Evaluator):
|
||||
def __init__(self, config, models, tensor_dict=None):
|
||||
super(MultiGPUF1Evaluator, self).__init__(config, models[0], tensor_dict=tensor_dict)
|
||||
self.models = models
|
||||
with tf.name_scope("eval_concat"):
|
||||
N, M, JX = config.batch_size, config.max_num_sents, config.max_sent_size
|
||||
self.yp = tf.concat(axis=0, values=[padded_reshape(model.yp, [N, M, JX]) for model in models])
|
||||
self.yp2 = tf.concat(axis=0, values=[padded_reshape(model.yp2, [N, M, JX]) for model in models])
|
||||
self.loss = tf.add_n([model.loss for model in models])/len(models)
|
||||
|
||||
def _split_batch(self, batches):
|
||||
idxs_list, data_sets = zip(*batches)
|
||||
idxs = sum(idxs_list, ())
|
||||
data_set = sum(data_sets, data_sets[0].get_empty())
|
||||
return idxs, data_set
|
||||
|
||||
def _get_feed_dict(self, batches):
|
||||
feed_dict = {}
|
||||
for model, (_, data_set) in zip(self.models, batches):
|
||||
feed_dict.update(model.get_feed_dict(data_set, False))
|
||||
return feed_dict
|
||||
|
||||
|
||||
class MultiGPUCNNAccuracyEvaluator(CNNAccuracyEvaluator):
|
||||
def __init__(self, config, models, tensor_dict=None):
|
||||
super(MultiGPUCNNAccuracyEvaluator, self).__init__(config, models[0], tensor_dict=tensor_dict)
|
||||
self.models = models
|
||||
with tf.name_scope("eval_concat"):
|
||||
N, M, JX = config.batch_size, config.max_num_sents, config.max_sent_size
|
||||
self.yp = tf.concat(axis=0, values=[padded_reshape(model.yp, [N, M, JX]) for model in models])
|
||||
self.loss = tf.add_n([model.loss for model in models])/len(models)
|
||||
|
||||
def _split_batch(self, batches):
|
||||
idxs_list, data_sets = zip(*batches)
|
||||
idxs = sum(idxs_list, ())
|
||||
data_set = sum(data_sets, data_sets[0].get_empty())
|
||||
return idxs, data_set
|
||||
|
||||
def _get_feed_dict(self, batches):
|
||||
feed_dict = {}
|
||||
for model, (_, data_set) in zip(self.models, batches):
|
||||
feed_dict.update(model.get_feed_dict(data_set, False))
|
||||
return feed_dict
|
||||
|
||||
|
||||
class ForwardEvaluator(Evaluator):
|
||||
def __init__(self, config, model, tensor_dict=None):
|
||||
super(ForwardEvaluator, self).__init__(config, model, tensor_dict=tensor_dict)
|
||||
self.yp2 = model.yp2
|
||||
self.loss = model.loss
|
||||
|
||||
def get_evaluation(self, sess, batch):
|
||||
idxs, data_set = batch
|
||||
assert isinstance(data_set, DataSet)
|
||||
feed_dict = self.model.get_feed_dict(data_set, False)
|
||||
global_step, yp, yp2, loss, vals = sess.run([self.global_step, self.yp, self.yp2, self.loss, list(self.tensor_dict.values())], feed_dict=feed_dict)
|
||||
|
||||
yp, yp2 = yp[:data_set.num_examples], yp2[:data_set.num_examples]
|
||||
spans = [get_best_span(ypi, yp2i) for ypi, yp2i in zip(yp, yp2)]
|
||||
|
||||
def _get(xi, span):
|
||||
if len(xi) <= span[0][0]:
|
||||
return [""]
|
||||
if len(xi[span[0][0]]) <= span[1][1]:
|
||||
return [""]
|
||||
return xi[span[0][0]][span[0][1]:span[1][1]]
|
||||
|
||||
id2answer_dict = {id_: " ".join(_get(xi, span))
|
||||
for id_, xi, span in zip(data_set.data['ids'], data_set.data['x'], spans)}
|
||||
tensor_dict = dict(zip(self.tensor_dict.keys(), vals))
|
||||
e = ForwardEvaluation(data_set.data_type, int(global_step), idxs, yp.tolist(), yp2.tolist(), float(loss), id2answer_dict, tensor_dict=tensor_dict)
|
||||
return e
|
||||
|
||||
@staticmethod
|
||||
def compare(yi, ypi, yp2i):
|
||||
for start, stop in yi:
|
||||
aypi = argmax(ypi)
|
||||
mask = np.zeros(yp2i.shape)
|
||||
mask[aypi[0], aypi[1]:] = np.ones([yp2i.shape[1] - aypi[1]])
|
||||
if tuple(start) == aypi and (stop[0], stop[1]-1) == argmax(yp2i * mask):
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def compare2(yi, span):
|
||||
for start, stop in yi:
|
||||
if tuple(start) == span[0] and tuple(stop) == span[1]:
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def span_f1(yi, span):
|
||||
max_f1 = 0
|
||||
for start, stop in yi:
|
||||
if start[0] == span[0][0]:
|
||||
true_span = start[1], stop[1]
|
||||
pred_span = span[0][1], span[1][1]
|
||||
f1 = span_f1(true_span, pred_span)
|
||||
max_f1 = max(f1, max_f1)
|
||||
return max_f1
|
||||
|
||||
|
||||
def get_best_span(ypi, yp2i):
|
||||
|
||||
max_val = 0
|
||||
best_word_span = (0, 1)
|
||||
best_sent_idx = 0
|
||||
for f, (ypif, yp2if) in enumerate(zip(ypi, yp2i)):
|
||||
argmax_j1 = 0
|
||||
for j in range(len(ypif)):
|
||||
val1 = ypif[argmax_j1]
|
||||
if val1 < ypif[j]:
|
||||
val1 = ypif[j]
|
||||
argmax_j1 = j
|
||||
|
||||
val2 = yp2if[j]
|
||||
if val1 * val2 > max_val:
|
||||
best_word_span = (argmax_j1, j)
|
||||
best_sent_idx = f
|
||||
max_val = val1 * val2
|
||||
return (best_sent_idx, best_word_span[0]), (best_sent_idx, best_word_span[1] + 1)
|
||||
|
||||
|
||||
def get_span_score_pairs(ypi, yp2i):
|
||||
span_score_pairs = []
|
||||
for f, (ypif, yp2if) in enumerate(zip(ypi, yp2i)):
|
||||
for j in range(len(ypif)):
|
||||
for k in range(j, len(yp2if)):
|
||||
span = ((f, j), (f, k+1))
|
||||
score = ypif[j] * yp2if[k]
|
||||
span_score_pairs.append((span, score))
|
||||
return span_score_pairs
|
70
tensorflow/SQuAD/basic_cnn/graph_handler.py
Normal file
70
tensorflow/SQuAD/basic_cnn/graph_handler.py
Normal file
|
@ -0,0 +1,70 @@
|
|||
import gzip
|
||||
import json
|
||||
from json import encoder
|
||||
import os
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from basic_cnn.evaluator import Evaluation, F1Evaluation
|
||||
from my.utils import short_floats
|
||||
|
||||
import pickle
|
||||
|
||||
|
||||
class GraphHandler(object):
|
||||
def __init__(self, config):
|
||||
self.config = config
|
||||
self.saver = tf.train.Saver(max_to_keep=config.max_to_keep)
|
||||
self.writer = None
|
||||
self.save_path = os.path.join(config.save_dir, config.model_name)
|
||||
|
||||
def initialize(self, sess):
|
||||
if self.config.load:
|
||||
self._load(sess)
|
||||
else:
|
||||
sess.run(tf.global_variables_initializer())
|
||||
|
||||
if self.config.mode == 'train':
|
||||
self.writer = tf.summary.FileWriter(self.config.log_dir, graph=tf.get_default_graph())
|
||||
|
||||
def save(self, sess, global_step=None):
|
||||
self.saver.save(sess, self.save_path, global_step=global_step)
|
||||
|
||||
def _load(self, sess):
|
||||
config = self.config
|
||||
if config.load_path:
|
||||
save_path = config.load_path
|
||||
elif config.load_step > 0:
|
||||
save_path = os.path.join(config.save_dir, "{}-{}".format(config.model_name, config.load_step))
|
||||
else:
|
||||
save_dir = config.save_dir
|
||||
checkpoint = tf.train.get_checkpoint_state(save_dir)
|
||||
assert checkpoint is not None, "cannot load checkpoint at {}".format(save_dir)
|
||||
save_path = checkpoint.model_checkpoint_path
|
||||
print("Loading saved model from {}".format(save_path))
|
||||
self.saver.restore(sess, save_path)
|
||||
|
||||
def add_summary(self, summary, global_step):
|
||||
self.writer.add_summary(summary, global_step)
|
||||
|
||||
def add_summaries(self, summaries, global_step):
|
||||
for summary in summaries:
|
||||
self.add_summary(summary, global_step)
|
||||
|
||||
def dump_eval(self, e, precision=2, path=None):
|
||||
assert isinstance(e, Evaluation)
|
||||
if self.config.dump_pickle:
|
||||
path = path or os.path.join(self.config.eval_dir, "{}-{}.pklz".format(e.data_type, str(e.global_step).zfill(6)))
|
||||
with gzip.open(path, 'wb', compresslevel=3) as fh:
|
||||
pickle.dump(e.dict, fh)
|
||||
else:
|
||||
path = path or os.path.join(self.config.eval_dir, "{}-{}.json".format(e.data_type, str(e.global_step).zfill(6)))
|
||||
with open(path, 'w') as fh:
|
||||
json.dump(short_floats(e.dict, precision), fh)
|
||||
|
||||
def dump_answer(self, e, path=None):
|
||||
assert isinstance(e, Evaluation)
|
||||
path = path or os.path.join(self.config.answer_dir, "{}-{}.json".format(e.data_type, str(e.global_step).zfill(6)))
|
||||
with open(path, 'w') as fh:
|
||||
json.dump(e.id2answer_dict, fh)
|
||||
|
238
tensorflow/SQuAD/basic_cnn/main.py
Normal file
238
tensorflow/SQuAD/basic_cnn/main.py
Normal file
|
@ -0,0 +1,238 @@
|
|||
import argparse
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
import shutil
|
||||
from pprint import pprint
|
||||
|
||||
import tensorflow as tf
|
||||
from tqdm import tqdm
|
||||
import numpy as np
|
||||
|
||||
from basic_cnn.evaluator import F1Evaluator, Evaluator, ForwardEvaluator, MultiGPUF1Evaluator, CNNAccuracyEvaluator, \
|
||||
MultiGPUCNNAccuracyEvaluator
|
||||
from basic_cnn.graph_handler import GraphHandler
|
||||
from basic_cnn.model import Model, get_multi_gpu_models
|
||||
from basic_cnn.trainer import Trainer, MultiGPUTrainer
|
||||
|
||||
from basic_cnn.read_data import read_data, get_cnn_data_filter, update_config
|
||||
|
||||
|
||||
def main(config):
|
||||
set_dirs(config)
|
||||
with tf.device(config.device):
|
||||
if config.mode == 'train':
|
||||
_train(config)
|
||||
elif config.mode == 'test' or config.mode == 'dev':
|
||||
_test(config)
|
||||
elif config.mode == 'forward':
|
||||
_forward(config)
|
||||
else:
|
||||
raise ValueError("invalid value for 'mode': {}".format(config.mode))
|
||||
|
||||
|
||||
def _config_draft(config):
|
||||
if config.draft:
|
||||
config.num_steps = 2
|
||||
config.eval_period = 1
|
||||
config.log_period = 1
|
||||
config.save_period = 1
|
||||
config.eval_num_batches = 1
|
||||
|
||||
|
||||
def _train(config):
|
||||
# load_metadata(config, 'train') # this updates the config file according to metadata file
|
||||
|
||||
data_filter = get_cnn_data_filter(config)
|
||||
train_data = read_data(config, 'train', config.load, data_filter=data_filter)
|
||||
dev_data = read_data(config, 'dev', True, data_filter=data_filter)
|
||||
# test_data = read_data(config, 'test', True, data_filter=data_filter)
|
||||
update_config(config, [train_data, dev_data])
|
||||
|
||||
_config_draft(config)
|
||||
|
||||
word2vec_dict = train_data.shared['lower_word2vec'] if config.lower_word else train_data.shared['word2vec']
|
||||
word2idx_dict = train_data.shared['word2idx']
|
||||
idx2vec_dict = {word2idx_dict[word]: vec for word, vec in word2vec_dict.items() if word in word2idx_dict}
|
||||
print("{}/{} unique words have corresponding glove vectors.".format(len(idx2vec_dict), len(word2idx_dict)))
|
||||
emb_mat = np.array([idx2vec_dict[idx] if idx in idx2vec_dict
|
||||
else np.random.multivariate_normal(np.zeros(config.word_emb_size), np.eye(config.word_emb_size))
|
||||
for idx in range(config.word_vocab_size)])
|
||||
config.emb_mat = emb_mat
|
||||
|
||||
# construct model graph and variables (using default graph)
|
||||
pprint(config.__flags, indent=2)
|
||||
# model = Model(config)
|
||||
models = get_multi_gpu_models(config)
|
||||
model = models[0]
|
||||
trainer = MultiGPUTrainer(config, models)
|
||||
evaluator = MultiGPUCNNAccuracyEvaluator(config, models, tensor_dict=model.tensor_dict if config.vis else None)
|
||||
graph_handler = GraphHandler(config) # controls all tensors and variables in the graph, including loading /saving
|
||||
|
||||
# Variables
|
||||
sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
|
||||
graph_handler.initialize(sess)
|
||||
|
||||
# begin training
|
||||
print(train_data.num_examples)
|
||||
num_steps = config.num_steps or int(math.ceil(train_data.num_examples / (config.batch_size * config.num_gpus))) * config.num_epochs
|
||||
global_step = 0
|
||||
for batches in tqdm(train_data.get_multi_batches(config.batch_size, config.num_gpus,
|
||||
num_steps=num_steps, shuffle=True, cluster=config.cluster), total=num_steps):
|
||||
global_step = sess.run(model.global_step) + 1 # +1 because all calculations are done after step
|
||||
get_summary = global_step % config.log_period == 0
|
||||
loss, summary, train_op = trainer.step(sess, batches, get_summary=get_summary)
|
||||
if get_summary:
|
||||
graph_handler.add_summary(summary, global_step)
|
||||
|
||||
# occasional saving
|
||||
if global_step % config.save_period == 0:
|
||||
graph_handler.save(sess, global_step=global_step)
|
||||
|
||||
if not config.eval:
|
||||
continue
|
||||
# Occasional evaluation
|
||||
if global_step % config.eval_period == 0:
|
||||
num_steps = math.ceil(dev_data.num_examples / (config.batch_size * config.num_gpus))
|
||||
if 0 < config.eval_num_batches < num_steps:
|
||||
num_steps = config.eval_num_batches
|
||||
e_train = evaluator.get_evaluation_from_batches(
|
||||
sess, tqdm(train_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps), total=num_steps)
|
||||
)
|
||||
graph_handler.add_summaries(e_train.summaries, global_step)
|
||||
e_dev = evaluator.get_evaluation_from_batches(
|
||||
sess, tqdm(dev_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps), total=num_steps))
|
||||
graph_handler.add_summaries(e_dev.summaries, global_step)
|
||||
|
||||
if config.dump_eval:
|
||||
graph_handler.dump_eval(e_dev)
|
||||
if config.dump_answer:
|
||||
graph_handler.dump_answer(e_dev)
|
||||
if global_step % config.save_period != 0:
|
||||
graph_handler.save(sess, global_step=global_step)
|
||||
|
||||
|
||||
def _test(config):
|
||||
assert config.load
|
||||
test_data = read_data(config, config.mode, True)
|
||||
update_config(config, [test_data])
|
||||
|
||||
_config_draft(config)
|
||||
|
||||
if config.use_glove_for_unk:
|
||||
word2vec_dict = test_data.shared['lower_word2vec'] if config.lower_word else test_data.shared['word2vec']
|
||||
new_word2idx_dict = test_data.shared['new_word2idx']
|
||||
idx2vec_dict = {idx: word2vec_dict[word] for word, idx in new_word2idx_dict.items()}
|
||||
# print("{}/{} unique words have corresponding glove vectors.".format(len(idx2vec_dict), len(word2idx_dict)))
|
||||
new_emb_mat = np.array([idx2vec_dict[idx] for idx in range(len(idx2vec_dict))], dtype='float32')
|
||||
config.new_emb_mat = new_emb_mat
|
||||
|
||||
pprint(config.__flags, indent=2)
|
||||
models = get_multi_gpu_models(config)
|
||||
evaluator = MultiGPUCNNAccuracyEvaluator(config, models, tensor_dict=models[0].tensor_dict if config.vis else None)
|
||||
graph_handler = GraphHandler(config) # controls all tensors and variables in the graph, including loading /saving
|
||||
|
||||
sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
|
||||
graph_handler.initialize(sess)
|
||||
num_steps = math.ceil(test_data.num_examples / (config.batch_size * config.num_gpus))
|
||||
if 0 < config.eval_num_batches < num_steps:
|
||||
num_steps = config.eval_num_batches
|
||||
|
||||
e = None
|
||||
for multi_batch in tqdm(test_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps, cluster=config.cluster), total=num_steps):
|
||||
ei = evaluator.get_evaluation(sess, multi_batch)
|
||||
e = ei if e is None else e + ei
|
||||
if config.vis:
|
||||
eval_subdir = os.path.join(config.eval_dir, "{}-{}".format(ei.data_type, str(ei.global_step).zfill(6)))
|
||||
if not os.path.exists(eval_subdir):
|
||||
os.mkdir(eval_subdir)
|
||||
path = os.path.join(eval_subdir, str(ei.idxs[0]).zfill(8))
|
||||
graph_handler.dump_eval(ei, path=path)
|
||||
|
||||
print(e)
|
||||
if config.dump_answer:
|
||||
print("dumping answer ...")
|
||||
graph_handler.dump_answer(e)
|
||||
if config.dump_eval:
|
||||
print("dumping eval ...")
|
||||
graph_handler.dump_eval(e)
|
||||
|
||||
|
||||
def _forward(config):
|
||||
assert config.load
|
||||
test_data = read_data(config, config.forward_name, True)
|
||||
update_config(config, [test_data])
|
||||
|
||||
_config_draft(config)
|
||||
|
||||
if config.use_glove_for_unk:
|
||||
word2vec_dict = test_data.shared['lower_word2vec'] if config.lower_word else test_data.shared['word2vec']
|
||||
new_word2idx_dict = test_data.shared['new_word2idx']
|
||||
idx2vec_dict = {idx: word2vec_dict[word] for word, idx in new_word2idx_dict.items()}
|
||||
# print("{}/{} unique words have corresponding glove vectors.".format(len(idx2vec_dict), len(word2idx_dict)))
|
||||
new_emb_mat = np.array([idx2vec_dict[idx] for idx in range(len(idx2vec_dict))], dtype='float32')
|
||||
config.new_emb_mat = new_emb_mat
|
||||
|
||||
pprint(config.__flags, indent=2)
|
||||
models = get_multi_gpu_models(config)
|
||||
model = models[0]
|
||||
evaluator = ForwardEvaluator(config, model)
|
||||
graph_handler = GraphHandler(config) # controls all tensors and variables in the graph, including loading /saving
|
||||
|
||||
sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
|
||||
graph_handler.initialize(sess)
|
||||
|
||||
num_batches = math.ceil(test_data.num_examples / config.batch_size)
|
||||
if 0 < config.eval_num_batches < num_batches:
|
||||
num_batches = config.eval_num_batches
|
||||
e = evaluator.get_evaluation_from_batches(sess, tqdm(test_data.get_batches(config.batch_size, num_batches=num_batches), total=num_batches))
|
||||
print(e)
|
||||
if config.dump_answer:
|
||||
print("dumping answer ...")
|
||||
graph_handler.dump_answer(e, path=config.answer_path)
|
||||
if config.dump_eval:
|
||||
print("dumping eval ...")
|
||||
graph_handler.dump_eval(e)
|
||||
|
||||
|
||||
def set_dirs(config):
|
||||
# create directories
|
||||
if not config.load and os.path.exists(config.out_dir):
|
||||
shutil.rmtree(config.out_dir)
|
||||
|
||||
config.save_dir = os.path.join(config.out_dir, "save")
|
||||
config.log_dir = os.path.join(config.out_dir, "log")
|
||||
config.eval_dir = os.path.join(config.out_dir, "eval")
|
||||
config.answer_dir = os.path.join(config.out_dir, "answer")
|
||||
if not os.path.exists(config.out_dir):
|
||||
os.makedirs(config.out_dir)
|
||||
if not os.path.exists(config.save_dir):
|
||||
os.mkdir(config.save_dir)
|
||||
if not os.path.exists(config.log_dir):
|
||||
os.mkdir(config.log_dir)
|
||||
if not os.path.exists(config.answer_dir):
|
||||
os.mkdir(config.answer_dir)
|
||||
if not os.path.exists(config.eval_dir):
|
||||
os.mkdir(config.eval_dir)
|
||||
|
||||
|
||||
def _get_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("config_path")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
class Config(object):
|
||||
def __init__(self, **entries):
|
||||
self.__dict__.update(entries)
|
||||
|
||||
|
||||
def _run():
|
||||
args = _get_args()
|
||||
with open(args.config_path, 'r') as fh:
|
||||
config = Config(**json.load(fh))
|
||||
main(config)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
_run()
|
375
tensorflow/SQuAD/basic_cnn/model.py
Normal file
375
tensorflow/SQuAD/basic_cnn/model.py
Normal file
|
@ -0,0 +1,375 @@
|
|||
import random
|
||||
|
||||
import itertools
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from tensorflow.python.ops.rnn_cell import BasicLSTMCell, GRUCell
|
||||
|
||||
from basic_cnn.read_data import DataSet
|
||||
from basic_cnn.superhighway import SHCell
|
||||
from my.tensorflow import exp_mask, get_initializer, VERY_SMALL_NUMBER
|
||||
from my.tensorflow.nn import linear, double_linear_logits, linear_logits, softsel, dropout, get_logits, softmax, \
|
||||
highway_network, multi_conv1d
|
||||
from my.tensorflow.rnn import bidirectional_dynamic_rnn, dynamic_rnn
|
||||
from my.tensorflow.rnn_cell import SwitchableDropoutWrapper, AttentionCell
|
||||
|
||||
|
||||
def bi_attention(config, is_train, h, u, h_mask=None, u_mask=None, scope=None, tensor_dict=None):
|
||||
"""
|
||||
h_a:
|
||||
all u attending on h
|
||||
choosing an element of h that max-matches u
|
||||
First creates confusion matrix between h and u
|
||||
Then take max of the attention weights over u row
|
||||
Finally softmax over
|
||||
|
||||
u_a:
|
||||
each h attending on u
|
||||
|
||||
:param h: [N, M, JX, d]
|
||||
:param u: [N, JQ, d]
|
||||
:param h_mask: [N, M, JX]
|
||||
:param u_mask: [N, B]
|
||||
:param scope:
|
||||
:return: [N, M, d], [N, M, JX, d]
|
||||
"""
|
||||
with tf.variable_scope(scope or "bi_attention"):
|
||||
N, M, JX, JQ, d = config.batch_size, config.max_num_sents, config.max_sent_size, config.max_ques_size, config.hidden_size
|
||||
JX = tf.shape(h)[2]
|
||||
h_aug = tf.tile(tf.expand_dims(h, 3), [1, 1, 1, JQ, 1])
|
||||
u_aug = tf.tile(tf.expand_dims(tf.expand_dims(u, 1), 1), [1, M, JX, 1, 1])
|
||||
if h_mask is None:
|
||||
and_mask = None
|
||||
else:
|
||||
h_mask_aug = tf.tile(tf.expand_dims(h_mask, 3), [1, 1, 1, JQ])
|
||||
u_mask_aug = tf.tile(tf.expand_dims(tf.expand_dims(u_mask, 1), 1), [1, M, JX, 1])
|
||||
and_mask = h_mask_aug & u_mask_aug
|
||||
|
||||
u_logits = get_logits([h_aug, u_aug], None, True, wd=config.wd, mask=and_mask,
|
||||
is_train=is_train, func=config.logit_func, scope='u_logits') # [N, M, JX, JQ]
|
||||
u_a = softsel(u_aug, u_logits) # [N, M, JX, d]
|
||||
if tensor_dict is not None:
|
||||
# a_h = tf.nn.softmax(h_logits) # [N, M, JX]
|
||||
a_u = tf.nn.softmax(u_logits) # [N, M, JX, JQ]
|
||||
# tensor_dict['a_h'] = a_h
|
||||
tensor_dict['a_u'] = a_u
|
||||
if config.bi:
|
||||
h_a = softsel(h, tf.reduce_max(u_logits, 3)) # [N, M, d]
|
||||
h_a = tf.tile(tf.expand_dims(h_a, 2), [1, 1, JX, 1])
|
||||
else:
|
||||
h_a = None
|
||||
return u_a, h_a
|
||||
|
||||
|
||||
def attention_layer(config, is_train, h, u, h_mask=None, u_mask=None, scope=None, tensor_dict=None):
|
||||
with tf.variable_scope(scope or "attention_layer"):
|
||||
u_a, h_a = bi_attention(config, is_train, h, u, h_mask=h_mask, u_mask=u_mask, tensor_dict=tensor_dict)
|
||||
if config.bi:
|
||||
p0 = tf.concat(axis=3, values=[h , u_a, h * u_a, h * h_a])
|
||||
else:
|
||||
p0 = tf.concat(axis=3, values=[h , u_a, h * u_a])
|
||||
return p0
|
||||
|
||||
|
||||
class Model(object):
|
||||
def __init__(self, config, scope):
|
||||
self.scope = scope
|
||||
self.config = config
|
||||
self.global_step = tf.get_variable('global_step', shape=[], dtype='int32',
|
||||
initializer=tf.constant_initializer(0), trainable=False)
|
||||
|
||||
# Define forward inputs here
|
||||
N, M, JX, JQ, VW, VC, W = \
|
||||
config.batch_size, config.max_num_sents, config.max_sent_size, \
|
||||
config.max_ques_size, config.word_vocab_size, config.char_vocab_size, config.max_word_size
|
||||
self.x = tf.placeholder('int32', [N, M, None], name='x')
|
||||
self.cx = tf.placeholder('int32', [N, M, None, W], name='cx')
|
||||
self.x_mask = tf.placeholder('bool', [N, M, None], name='x_mask')
|
||||
self.q = tf.placeholder('int32', [N, JQ], name='q')
|
||||
self.cq = tf.placeholder('int32', [N, JQ, W], name='cq')
|
||||
self.q_mask = tf.placeholder('bool', [N, JQ], name='q_mask')
|
||||
self.y = tf.placeholder('bool', [N, M, JX], name='y')
|
||||
self.is_train = tf.placeholder('bool', [], name='is_train')
|
||||
self.new_emb_mat = tf.placeholder('float', [None, config.word_emb_size], name='new_emb_mat')
|
||||
|
||||
# Define misc
|
||||
self.tensor_dict = {}
|
||||
|
||||
# Forward outputs / loss inputs
|
||||
self.logits = None
|
||||
self.yp = None
|
||||
self.var_list = None
|
||||
|
||||
# Loss outputs
|
||||
self.loss = None
|
||||
|
||||
self._build_forward()
|
||||
self._build_loss()
|
||||
if config.mode == 'train':
|
||||
self._build_ema()
|
||||
|
||||
self.summary = tf.summary.merge_all()
|
||||
self.summary = tf.summary.merge(tf.get_collection("summaries", scope=self.scope))
|
||||
|
||||
def _build_forward(self):
|
||||
config = self.config
|
||||
N, M, JX, JQ, VW, VC, d, W = \
|
||||
config.batch_size, config.max_num_sents, config.max_sent_size, \
|
||||
config.max_ques_size, config.word_vocab_size, config.char_vocab_size, config.hidden_size, \
|
||||
config.max_word_size
|
||||
JX = tf.shape(self.x)[2]
|
||||
dc, dw, dco = config.char_emb_size, config.word_emb_size, config.char_out_size
|
||||
|
||||
with tf.variable_scope("emb"):
|
||||
with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
|
||||
char_emb_mat = tf.get_variable("char_emb_mat", shape=[VC, dc], dtype='float')
|
||||
|
||||
with tf.variable_scope("char"):
|
||||
Acx = tf.nn.embedding_lookup(char_emb_mat, self.cx) # [N, M, JX, W, dc]
|
||||
Acq = tf.nn.embedding_lookup(char_emb_mat, self.cq) # [N, JQ, W, dc]
|
||||
Acx = tf.reshape(Acx, [-1, JX, W, dc])
|
||||
Acq = tf.reshape(Acq, [-1, JQ, W, dc])
|
||||
|
||||
filter_sizes = list(map(int, config.out_channel_dims.split(',')))
|
||||
heights = list(map(int, config.filter_heights.split(',')))
|
||||
assert sum(filter_sizes) == dco
|
||||
with tf.variable_scope("conv"):
|
||||
xx = multi_conv1d(Acx, filter_sizes, heights, "VALID", self.is_train, config.keep_prob, scope="xx")
|
||||
if config.share_cnn_weights:
|
||||
tf.get_variable_scope().reuse_variables()
|
||||
qq = multi_conv1d(Acq, filter_sizes, heights, "VALID", self.is_train, config.keep_prob, scope="xx")
|
||||
else:
|
||||
qq = multi_conv1d(Acq, filter_sizes, heights, "VALID", self.is_train, config.keep_prob, scope="qq")
|
||||
xx = tf.reshape(xx, [-1, M, JX, dco])
|
||||
qq = tf.reshape(qq, [-1, JQ, dco])
|
||||
|
||||
if config.use_word_emb:
|
||||
with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
|
||||
if config.mode == 'train':
|
||||
word_emb_mat = tf.get_variable("word_emb_mat", dtype='float', shape=[VW, dw], initializer=get_initializer(config.emb_mat))
|
||||
else:
|
||||
word_emb_mat = tf.get_variable("word_emb_mat", shape=[VW, dw], dtype='float')
|
||||
if config.use_glove_for_unk:
|
||||
word_emb_mat = tf.concat(axis=0, values=[word_emb_mat, self.new_emb_mat])
|
||||
|
||||
with tf.name_scope("word"):
|
||||
Ax = tf.nn.embedding_lookup(word_emb_mat, self.x) # [N, M, JX, d]
|
||||
Aq = tf.nn.embedding_lookup(word_emb_mat, self.q) # [N, JQ, d]
|
||||
self.tensor_dict['x'] = Ax
|
||||
self.tensor_dict['q'] = Aq
|
||||
xx = tf.concat(axis=3, values=[xx, Ax]) # [N, M, JX, di]
|
||||
qq = tf.concat(axis=2, values=[qq, Aq]) # [N, JQ, di]
|
||||
|
||||
# highway network
|
||||
with tf.variable_scope("highway"):
|
||||
xx = highway_network(xx, config.highway_num_layers, True, wd=config.wd, is_train=self.is_train)
|
||||
tf.get_variable_scope().reuse_variables()
|
||||
qq = highway_network(qq, config.highway_num_layers, True, wd=config.wd, is_train=self.is_train)
|
||||
self.tensor_dict['xx'] = xx
|
||||
self.tensor_dict['qq'] = qq
|
||||
|
||||
cell = BasicLSTMCell(d, state_is_tuple=True)
|
||||
d_cell = SwitchableDropoutWrapper(cell, self.is_train, input_keep_prob=config.input_keep_prob)
|
||||
x_len = tf.reduce_sum(tf.cast(self.x_mask, 'int32'), 2) # [N, M]
|
||||
q_len = tf.reduce_sum(tf.cast(self.q_mask, 'int32'), 1) # [N]
|
||||
|
||||
with tf.variable_scope("prepro"):
|
||||
(fw_u, bw_u), ((_, fw_u_f), (_, bw_u_f)) = bidirectional_dynamic_rnn(d_cell, d_cell, qq, q_len, dtype='float', scope='u1') # [N, J, d], [N, d]
|
||||
u = tf.concat(axis=2, values=[fw_u, bw_u])
|
||||
if config.two_prepro_layers:
|
||||
(fw_u, bw_u), ((_, fw_u_f), (_, bw_u_f)) = bidirectional_dynamic_rnn(d_cell, d_cell, u, q_len, dtype='float', scope='u2') # [N, J, d], [N, d]
|
||||
u = tf.concat(axis=2, values=[fw_u, bw_u])
|
||||
if config.share_lstm_weights:
|
||||
tf.get_variable_scope().reuse_variables()
|
||||
(fw_h, bw_h), _ = bidirectional_dynamic_rnn(cell, cell, xx, x_len, dtype='float', scope='u1') # [N, M, JX, 2d]
|
||||
h = tf.concat(axis=3, values=[fw_h, bw_h]) # [N, M, JX, 2d]
|
||||
if config.two_prepro_layers:
|
||||
(fw_h, bw_h), _ = bidirectional_dynamic_rnn(cell, cell, h, x_len, dtype='float', scope='u2') # [N, M, JX, 2d]
|
||||
h = tf.concat(axis=3, values=[fw_h, bw_h]) # [N, M, JX, 2d]
|
||||
|
||||
else:
|
||||
(fw_h, bw_h), _ = bidirectional_dynamic_rnn(cell, cell, xx, x_len, dtype='float', scope='h1') # [N, M, JX, 2d]
|
||||
h = tf.concat(axis=3, values=[fw_h, bw_h]) # [N, M, JX, 2d]
|
||||
if config.two_prepro_layers:
|
||||
(fw_h, bw_h), _ = bidirectional_dynamic_rnn(cell, cell, h, x_len, dtype='float', scope='h2') # [N, M, JX, 2d]
|
||||
h = tf.concat(axis=3, values=[fw_h, bw_h]) # [N, M, JX, 2d]
|
||||
self.tensor_dict['u'] = u
|
||||
self.tensor_dict['h'] = h
|
||||
|
||||
with tf.variable_scope("main"):
|
||||
p0 = attention_layer(config, self.is_train, h, u, h_mask=self.x_mask, u_mask=self.q_mask, scope="p0", tensor_dict=self.tensor_dict)
|
||||
(fw_g0, bw_g0), _ = bidirectional_dynamic_rnn(d_cell, d_cell, p0, x_len, dtype='float', scope='g0') # [N, M, JX, 2d]
|
||||
g0 = tf.concat(axis=3, values=[fw_g0, bw_g0])
|
||||
# p1 = attention_layer(config, self.is_train, g0, u, h_mask=self.x_mask, u_mask=self.q_mask, scope="p1")
|
||||
(fw_g1, bw_g1), _ = bidirectional_dynamic_rnn(d_cell, d_cell, g0, x_len, dtype='float', scope='g1') # [N, M, JX, 2d]
|
||||
g1 = tf.concat(axis=3, values=[fw_g1, bw_g1])
|
||||
# logits = u_logits(config, self.is_train, g1, u, h_mask=self.x_mask, u_mask=self.q_mask, scope="logits")
|
||||
# [N, M, JX]
|
||||
logits = get_logits([g1, p0], d, True, wd=config.wd, input_keep_prob=config.input_keep_prob, mask=self.x_mask, is_train=self.is_train, func=config.answer_func, scope='logits1')
|
||||
a1i = softsel(tf.reshape(g1, [N, M*JX, 2*d]), tf.reshape(logits, [N, M*JX]))
|
||||
|
||||
if config.feed_gt:
|
||||
logy = tf.log(tf.cast(self.y, 'float') + VERY_SMALL_NUMBER)
|
||||
logits = tf.cond(self.is_train, lambda: logy, lambda: logits)
|
||||
if config.feed_hard:
|
||||
hard_yp = tf.argmax(tf.reshape(logits, [N, M*JX]), 1)
|
||||
hard_logits = tf.reshape(tf.one_hot(hard_yp, M*JX), [N, M, JX]) # [N, M, JX]
|
||||
logits = tf.cond(self.is_train, lambda: logits, lambda: hard_logits)
|
||||
|
||||
flat_logits = tf.reshape(logits, [-1, M * JX])
|
||||
flat_yp = tf.nn.softmax(flat_logits) # [-1, M*JX]
|
||||
yp = tf.reshape(flat_yp, [-1, M, JX])
|
||||
|
||||
self.tensor_dict['g1'] = g1
|
||||
|
||||
self.logits = flat_logits
|
||||
self.yp = yp
|
||||
|
||||
def _build_loss(self):
|
||||
config = self.config
|
||||
N, M, JX, JQ, VW, VC = \
|
||||
config.batch_size, config.max_num_sents, config.max_sent_size, \
|
||||
config.max_ques_size, config.word_vocab_size, config.char_vocab_size
|
||||
JX = tf.shape(self.x)[2]
|
||||
loss_mask = tf.reduce_max(tf.cast(self.q_mask, 'float'), 1)
|
||||
losses = -tf.log(tf.reduce_sum(self.yp * tf.cast(self.y, 'float'), [1, 2]) + VERY_SMALL_NUMBER)
|
||||
ce_loss = tf.reduce_mean(loss_mask * losses)
|
||||
tf.add_to_collection('losses', ce_loss)
|
||||
|
||||
self.loss = tf.add_n(tf.get_collection('losses', scope=self.scope), name='loss')
|
||||
tf.summary.scalar(self.loss.op.name, self.loss)
|
||||
tf.add_to_collection('ema/scalar', self.loss)
|
||||
|
||||
def _build_ema(self):
|
||||
ema = tf.train.ExponentialMovingAverage(self.config.decay)
|
||||
ema_op = ema.apply(tf.get_collection("ema/scalar", scope=self.scope) + tf.get_collection("ema/histogram", scope=self.scope))
|
||||
for var in tf.get_collection("ema/scalar", scope=self.scope):
|
||||
ema_var = ema.average(var)
|
||||
tf.summary.scalar(ema_var.op.name, ema_var)
|
||||
for var in tf.get_collection("ema/histogram", scope=self.scope):
|
||||
ema_var = ema.average(var)
|
||||
tf.summary.histogram(ema_var.op.name, ema_var)
|
||||
|
||||
with tf.control_dependencies([ema_op]):
|
||||
self.loss = tf.identity(self.loss)
|
||||
|
||||
def get_loss(self):
|
||||
return self.loss
|
||||
|
||||
def get_global_step(self):
|
||||
return self.global_step
|
||||
|
||||
def get_var_list(self):
|
||||
return self.var_list
|
||||
|
||||
def get_feed_dict(self, batch, is_train, supervised=True):
|
||||
assert isinstance(batch, DataSet)
|
||||
config = self.config
|
||||
N, M, JX, JQ, VW, VC, d, W = \
|
||||
config.batch_size, config.max_num_sents, config.max_sent_size, \
|
||||
config.max_ques_size, config.word_vocab_size, config.char_vocab_size, config.hidden_size, config.max_word_size
|
||||
feed_dict = {}
|
||||
|
||||
if config.len_opt:
|
||||
"""
|
||||
Note that this optimization results in variable GPU RAM usage (i.e. can cause OOM in the middle of training.)
|
||||
First test without len_opt and make sure no OOM, and use len_opt
|
||||
"""
|
||||
if sum(len(para) for para in batch.data['x']) == 0:
|
||||
new_JX = 1
|
||||
else:
|
||||
new_JX = max(len(para) for para in batch.data['x'])
|
||||
JX = min(JX, new_JX)
|
||||
# print(JX)
|
||||
|
||||
x = np.zeros([N, M, JX], dtype='int32')
|
||||
cx = np.zeros([N, M, JX, W], dtype='int32')
|
||||
x_mask = np.zeros([N, M, JX], dtype='bool')
|
||||
q = np.zeros([N, JQ], dtype='int32')
|
||||
cq = np.zeros([N, JQ, W], dtype='int32')
|
||||
q_mask = np.zeros([N, JQ], dtype='bool')
|
||||
|
||||
feed_dict[self.x] = x
|
||||
feed_dict[self.x_mask] = x_mask
|
||||
feed_dict[self.cx] = cx
|
||||
feed_dict[self.q] = q
|
||||
feed_dict[self.cq] = cq
|
||||
feed_dict[self.q_mask] = q_mask
|
||||
feed_dict[self.is_train] = is_train
|
||||
if config.use_glove_for_unk:
|
||||
feed_dict[self.new_emb_mat] = batch.shared['new_emb_mat']
|
||||
|
||||
X = batch.data['x']
|
||||
CX = batch.data['cx']
|
||||
|
||||
def _get_word(word):
|
||||
if word.startswith("@"):
|
||||
return 2
|
||||
d = batch.shared['word2idx']
|
||||
for each in (word, word.lower(), word.capitalize(), word.upper()):
|
||||
if each in d:
|
||||
return d[each]
|
||||
if config.use_glove_for_unk:
|
||||
d2 = batch.shared['new_word2idx']
|
||||
for each in (word, word.lower(), word.capitalize(), word.upper()):
|
||||
if each in d2:
|
||||
return d2[each] + len(d)
|
||||
return 1
|
||||
|
||||
def _get_char(char):
|
||||
d = batch.shared['char2idx']
|
||||
if char in d:
|
||||
return d[char]
|
||||
return 1
|
||||
|
||||
if supervised:
|
||||
y = np.zeros([N, M, JX], dtype='int32')
|
||||
feed_dict[self.y] = y
|
||||
|
||||
for i, (xi, yi) in enumerate(zip(batch.data['x'], batch.data['y'])):
|
||||
count = 0
|
||||
for j, xij in enumerate(xi):
|
||||
for k, xijk in enumerate(xij):
|
||||
if xijk == yi:
|
||||
y[i, j, k] = True
|
||||
count += 1
|
||||
assert count > 0
|
||||
|
||||
for i, xi in enumerate(X):
|
||||
for j, xij in enumerate(xi):
|
||||
for k, xijk in enumerate(xij):
|
||||
each = _get_word(xijk)
|
||||
x[i, j, k] = each
|
||||
x_mask[i, j, k] = True
|
||||
|
||||
for i, cxi in enumerate(CX):
|
||||
for j, cxij in enumerate(cxi):
|
||||
for k, cxijk in enumerate(cxij):
|
||||
for l, cxijkl in enumerate(cxijk):
|
||||
cx[i, j, k, l] = _get_char(cxijkl)
|
||||
if l + 1 == config.max_word_size:
|
||||
break
|
||||
|
||||
for i, qi in enumerate(batch.data['q']):
|
||||
for j, qij in enumerate(qi):
|
||||
q[i, j] = _get_word(qij)
|
||||
q_mask[i, j] = True
|
||||
|
||||
for i, cqi in enumerate(batch.data['cq']):
|
||||
for j, cqij in enumerate(cqi):
|
||||
for k, cqijk in enumerate(cqij):
|
||||
cq[i, j, k] = _get_char(cqijk)
|
||||
if k + 1 == config.max_word_size:
|
||||
break
|
||||
|
||||
return feed_dict
|
||||
|
||||
|
||||
def get_multi_gpu_models(config):
|
||||
models = []
|
||||
for gpu_idx in range(config.num_gpus):
|
||||
with tf.name_scope("model_{}".format(gpu_idx)) as scope, tf.device("/gpu:{}".format(gpu_idx)):
|
||||
model = Model(config, scope)
|
||||
tf.get_variable_scope().reuse_variables()
|
||||
models.append(model)
|
||||
return models
|
294
tensorflow/SQuAD/basic_cnn/read_data.py
Normal file
294
tensorflow/SQuAD/basic_cnn/read_data.py
Normal file
|
@ -0,0 +1,294 @@
|
|||
import json
|
||||
import os
|
||||
import random
|
||||
import itertools
|
||||
import math
|
||||
from collections import defaultdict
|
||||
|
||||
import numpy as np
|
||||
|
||||
from cnn_dm.prepro import para2sents
|
||||
from my.tensorflow import grouper
|
||||
from my.utils import index
|
||||
|
||||
|
||||
class Data(object):
|
||||
def get_size(self):
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_by_idxs(self, idxs):
|
||||
"""
|
||||
Efficient way to obtain a batch of items from filesystem
|
||||
:param idxs:
|
||||
:return dict: {'X': [,], 'Y', }
|
||||
"""
|
||||
data = defaultdict(list)
|
||||
for idx in idxs:
|
||||
each_data = self.get_one(idx)
|
||||
for key, val in each_data.items():
|
||||
data[key].append(val)
|
||||
return data
|
||||
|
||||
def get_one(self, idx):
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_empty(self):
|
||||
raise NotImplementedError()
|
||||
|
||||
def __add__(self, other):
|
||||
raise NotImplementedError()
|
||||
|
||||
class MyData(Data):
|
||||
def __init__(self, config, root_dir, file_names):
|
||||
self.root_dir = root_dir
|
||||
self.file_names = file_names
|
||||
self.config = config
|
||||
|
||||
def get_one(self, idx):
|
||||
file_name = self.file_names[idx]
|
||||
with open(os.path.join(self.root_dir, file_name), 'r') as fh:
|
||||
url = fh.readline().strip()
|
||||
_ = fh.readline()
|
||||
para = fh.readline().strip()
|
||||
_ = fh.readline()
|
||||
ques = fh.readline().strip()
|
||||
_ = fh.readline()
|
||||
answer = fh.readline().strip()
|
||||
_ = fh.readline()
|
||||
cands = list(line.strip() for line in fh)
|
||||
cand_ents = list(cand.split(":")[0] for cand in cands)
|
||||
wordss = para2sents(para, self.config.width)
|
||||
ques_words = ques.split(" ")
|
||||
|
||||
x = wordss
|
||||
cx = [[list(word) for word in words] for words in wordss]
|
||||
q = ques_words
|
||||
cq = [list(word) for word in ques_words]
|
||||
y = answer
|
||||
c = cand_ents
|
||||
|
||||
data = {'x': x, 'cx': cx, 'q': q, 'cq': cq, 'y': y, 'c': c, 'ids': file_name}
|
||||
return data
|
||||
|
||||
def get_empty(self):
|
||||
return MyData(self.config, self.root_dir, [])
|
||||
|
||||
def __add__(self, other):
|
||||
file_names = self.file_names + other.file_names
|
||||
return MyData(self.config, self.root_dir, file_names)
|
||||
|
||||
def get_size(self):
|
||||
return len(self.file_names)
|
||||
|
||||
|
||||
class DataSet(object):
|
||||
def __init__(self, data, data_type, shared=None, valid_idxs=None):
|
||||
self.data = data # e.g. {'X': [0, 1, 2], 'Y': [2, 3, 4]}
|
||||
self.data_type = data_type
|
||||
self.shared = shared
|
||||
total_num_examples = self.get_data_size()
|
||||
self.valid_idxs = range(total_num_examples) if valid_idxs is None else valid_idxs
|
||||
self.num_examples = total_num_examples
|
||||
|
||||
def _sort_key(self, idx):
|
||||
rx = self.data['*x'][idx]
|
||||
x = self.shared['x'][rx[0]][rx[1]]
|
||||
return max(map(len, x))
|
||||
|
||||
def get_data_size(self):
|
||||
if isinstance(self.data, dict):
|
||||
return len(next(iter(self.data.values())))
|
||||
elif isinstance(self.data, Data):
|
||||
return self.data.get_size()
|
||||
raise Exception()
|
||||
|
||||
def get_by_idxs(self, idxs):
|
||||
if isinstance(self.data, dict):
|
||||
out = defaultdict(list)
|
||||
for key, val in self.data.items():
|
||||
out[key].extend(val[idx] for idx in idxs)
|
||||
return out
|
||||
elif isinstance(self.data, Data):
|
||||
return self.data.get_by_idxs(idxs)
|
||||
raise Exception()
|
||||
|
||||
def get_one(self, idx):
|
||||
if isinstance(self.data, dict):
|
||||
out = {key: [val[idx]] for key, val in self.data.items()}
|
||||
return out
|
||||
elif isinstance(self.data, Data):
|
||||
return self.data.get_one(idx)
|
||||
|
||||
def get_batches(self, batch_size, num_batches=None, shuffle=False, cluster=False):
|
||||
"""
|
||||
|
||||
:param batch_size:
|
||||
:param num_batches:
|
||||
:param shuffle:
|
||||
:param cluster: cluster examples by their lengths; this might give performance boost (i.e. faster training).
|
||||
:return:
|
||||
"""
|
||||
num_batches_per_epoch = int(math.ceil(self.num_examples / batch_size))
|
||||
if num_batches is None:
|
||||
num_batches = num_batches_per_epoch
|
||||
num_epochs = int(math.ceil(num_batches / num_batches_per_epoch))
|
||||
|
||||
if shuffle:
|
||||
random_idxs = random.sample(self.valid_idxs, len(self.valid_idxs))
|
||||
if cluster:
|
||||
sorted_idxs = sorted(random_idxs, key=self._sort_key)
|
||||
sorted_grouped = lambda: list(grouper(sorted_idxs, batch_size))
|
||||
grouped = lambda: random.sample(sorted_grouped(), num_batches_per_epoch)
|
||||
else:
|
||||
random_grouped = lambda: list(grouper(random_idxs, batch_size))
|
||||
grouped = random_grouped
|
||||
else:
|
||||
raw_grouped = lambda: list(grouper(self.valid_idxs, batch_size))
|
||||
grouped = raw_grouped
|
||||
|
||||
batch_idx_tuples = itertools.chain.from_iterable(grouped() for _ in range(num_epochs))
|
||||
for _ in range(num_batches):
|
||||
batch_idxs = tuple(i for i in next(batch_idx_tuples) if i is not None)
|
||||
batch_data = self.get_by_idxs(batch_idxs)
|
||||
shared_batch_data = {}
|
||||
for key, val in batch_data.items():
|
||||
if key.startswith('*'):
|
||||
assert self.shared is not None
|
||||
shared_key = key[1:]
|
||||
shared_batch_data[shared_key] = [index(self.shared[shared_key], each) for each in val]
|
||||
batch_data.update(shared_batch_data)
|
||||
|
||||
batch_ds = DataSet(batch_data, self.data_type, shared=self.shared)
|
||||
yield batch_idxs, batch_ds
|
||||
|
||||
def get_multi_batches(self, batch_size, num_batches_per_step, num_steps=None, shuffle=False, cluster=False):
|
||||
batch_size_per_step = batch_size * num_batches_per_step
|
||||
batches = self.get_batches(batch_size_per_step, num_batches=num_steps, shuffle=shuffle, cluster=cluster)
|
||||
multi_batches = (tuple(zip(grouper(idxs, batch_size, shorten=True, num_groups=num_batches_per_step),
|
||||
data_set.divide(num_batches_per_step))) for idxs, data_set in batches)
|
||||
return multi_batches
|
||||
|
||||
def get_empty(self):
|
||||
if isinstance(self.data, dict):
|
||||
data = {key: [] for key in self.data}
|
||||
elif isinstance(self.data, Data):
|
||||
data = self.data.get_empty()
|
||||
else:
|
||||
raise Exception()
|
||||
return DataSet(data, self.data_type, shared=self.shared)
|
||||
|
||||
def __add__(self, other):
|
||||
if isinstance(self.data, dict):
|
||||
data = {key: val + other.data[key] for key, val in self.data.items()}
|
||||
elif isinstance(self.data, Data):
|
||||
data = self.data + other.data
|
||||
else:
|
||||
raise Exception()
|
||||
|
||||
valid_idxs = list(self.valid_idxs) + [valid_idx + self.num_examples for valid_idx in other.valid_idxs]
|
||||
return DataSet(data, self.data_type, shared=self.shared, valid_idxs=valid_idxs)
|
||||
|
||||
def divide(self, integer):
|
||||
batch_size = int(math.ceil(self.num_examples / integer))
|
||||
idxs_gen = grouper(self.valid_idxs, batch_size, shorten=True, num_groups=integer)
|
||||
data_gen = (self.get_by_idxs(idxs) for idxs in idxs_gen)
|
||||
ds_tuple = tuple(DataSet(data, self.data_type, shared=self.shared) for data in data_gen)
|
||||
return ds_tuple
|
||||
|
||||
|
||||
class MyDataSet(DataSet):
|
||||
def __init__(self, data, data_type, shared=None, valid_idxs=None):
|
||||
super(MyDataSet, self).__init__(data, data_type, shared=shared, valid_idxs=valid_idxs)
|
||||
shared['max_num_sents'] = len(self.get_one(self.num_examples-1)['x'])
|
||||
|
||||
def _sort_key(self, idx):
|
||||
return idx
|
||||
|
||||
|
||||
def read_data(config, data_type, ref, data_filter=None):
|
||||
shared_path = os.path.join(config.data_dir, "shared_{}.json".format(data_type))
|
||||
with open(shared_path, 'r') as fh:
|
||||
shared = json.load(fh)
|
||||
|
||||
paths = shared['sorted']
|
||||
if config.filter_ratio < 1.0:
|
||||
stop = int(round(len(paths) * config.filter_ratio))
|
||||
paths = paths[:stop]
|
||||
num_examples = len(paths)
|
||||
valid_idxs = range(num_examples)
|
||||
|
||||
print("Loaded {}/{} examples from {}".format(len(valid_idxs), num_examples, data_type))
|
||||
|
||||
shared_path = config.shared_path or os.path.join(config.out_dir, "shared.json")
|
||||
if not ref:
|
||||
word2vec_dict = shared['lower_word2vec'] if config.lower_word else shared['word2vec']
|
||||
word_counter = shared['lower_word_counter'] if config.lower_word else shared['word_counter']
|
||||
char_counter = shared['char_counter']
|
||||
if config.finetune:
|
||||
shared['word2idx'] = {word: idx + 3 for idx, word in
|
||||
enumerate(word for word, count in word_counter.items()
|
||||
if count > config.word_count_th or (config.known_if_glove and word in word2vec_dict))}
|
||||
else:
|
||||
assert config.known_if_glove
|
||||
assert config.use_glove_for_unk
|
||||
shared['word2idx'] = {word: idx + 3 for idx, word in
|
||||
enumerate(word for word, count in word_counter.items()
|
||||
if count > config.word_count_th and word not in word2vec_dict)}
|
||||
shared['char2idx'] = {char: idx + 2 for idx, char in
|
||||
enumerate(char for char, count in char_counter.items()
|
||||
if count > config.char_count_th)}
|
||||
NULL = "-NULL-"
|
||||
UNK = "-UNK-"
|
||||
ENT = "-ENT-"
|
||||
shared['word2idx'][NULL] = 0
|
||||
shared['word2idx'][UNK] = 1
|
||||
shared['word2idx'][ENT] = 2
|
||||
shared['char2idx'][NULL] = 0
|
||||
shared['char2idx'][UNK] = 1
|
||||
|
||||
json.dump({'word2idx': shared['word2idx'], 'char2idx': shared['char2idx']}, open(shared_path, 'w'))
|
||||
else:
|
||||
new_shared = json.load(open(shared_path, 'r'))
|
||||
for key, val in new_shared.items():
|
||||
shared[key] = val
|
||||
|
||||
if config.use_glove_for_unk:
|
||||
# create new word2idx and word2vec
|
||||
word2vec_dict = shared['lower_word2vec'] if config.lower_word else shared['word2vec']
|
||||
new_word2idx_dict = {word: idx for idx, word in enumerate(word for word in word2vec_dict.keys() if word not in shared['word2idx'])}
|
||||
shared['new_word2idx'] = new_word2idx_dict
|
||||
offset = len(shared['word2idx'])
|
||||
word2vec_dict = shared['lower_word2vec'] if config.lower_word else shared['word2vec']
|
||||
new_word2idx_dict = shared['new_word2idx']
|
||||
idx2vec_dict = {idx: word2vec_dict[word] for word, idx in new_word2idx_dict.items()}
|
||||
# print("{}/{} unique words have corresponding glove vectors.".format(len(idx2vec_dict), len(word2idx_dict)))
|
||||
new_emb_mat = np.array([idx2vec_dict[idx] for idx in range(len(idx2vec_dict))], dtype='float32')
|
||||
shared['new_emb_mat'] = new_emb_mat
|
||||
|
||||
data = MyData(config, os.path.join(config.root_dir, data_type), paths)
|
||||
data_set = MyDataSet(data, data_type, shared=shared, valid_idxs=valid_idxs)
|
||||
return data_set
|
||||
|
||||
|
||||
def get_cnn_data_filter(config):
|
||||
return True
|
||||
|
||||
|
||||
def update_config(config, data_sets):
|
||||
config.max_num_sents = 0
|
||||
config.max_sent_size = 0
|
||||
config.max_ques_size = 0
|
||||
config.max_word_size = 0
|
||||
for data_set in data_sets:
|
||||
shared = data_set.shared
|
||||
config.max_sent_size = max(config.max_sent_size, shared['max_sent_size'])
|
||||
config.max_ques_size = max(config.max_ques_size, shared['max_ques_size'])
|
||||
config.max_word_size = max(config.max_word_size, shared['max_word_size'])
|
||||
config.max_num_sents = max(config.max_num_sents, shared['max_num_sents'])
|
||||
|
||||
config.max_word_size = min(config.max_word_size, config.word_size_th)
|
||||
|
||||
config.char_vocab_size = len(data_sets[0].shared['char2idx'])
|
||||
config.word_emb_size = len(next(iter(data_sets[0].shared['word2vec'].values())))
|
||||
config.word_vocab_size = len(data_sets[0].shared['word2idx'])
|
||||
|
47
tensorflow/SQuAD/basic_cnn/superhighway.py
Normal file
47
tensorflow/SQuAD/basic_cnn/superhighway.py
Normal file
|
@ -0,0 +1,47 @@
|
|||
import tensorflow as tf
|
||||
from tensorflow.python.ops.rnn_cell import RNNCell
|
||||
|
||||
from my.tensorflow.nn import linear
|
||||
|
||||
|
||||
class SHCell(RNNCell):
|
||||
"""
|
||||
Super-Highway Cell
|
||||
"""
|
||||
def __init__(self, input_size, logit_func='tri_linear', scalar=False):
|
||||
self._state_size = input_size
|
||||
self._output_size = input_size
|
||||
self._logit_func = logit_func
|
||||
self._scalar = scalar
|
||||
|
||||
@property
|
||||
def state_size(self):
|
||||
return self._state_size
|
||||
|
||||
@property
|
||||
def output_size(self):
|
||||
return self._output_size
|
||||
|
||||
def __call__(self, inputs, state, scope=None):
|
||||
with tf.variable_scope(scope or "SHCell"):
|
||||
a_size = 1 if self._scalar else self._state_size
|
||||
h, u = tf.split(axis=1, num_or_size_splits=2, value=inputs)
|
||||
if self._logit_func == 'mul_linear':
|
||||
args = [h * u, state * u]
|
||||
a = tf.nn.sigmoid(linear(args, a_size, True))
|
||||
elif self._logit_func == 'linear':
|
||||
args = [h, u, state]
|
||||
a = tf.nn.sigmoid(linear(args, a_size, True))
|
||||
elif self._logit_func == 'tri_linear':
|
||||
args = [h, u, state, h * u, state * u]
|
||||
a = tf.nn.sigmoid(linear(args, a_size, True))
|
||||
elif self._logit_func == 'double':
|
||||
args = [h, u, state]
|
||||
a = tf.nn.sigmoid(linear(tf.tanh(linear(args, a_size, True)), self._state_size, True))
|
||||
|
||||
else:
|
||||
raise Exception()
|
||||
new_state = a * state + (1 - a) * h
|
||||
outputs = state
|
||||
return outputs, new_state
|
||||
|
76
tensorflow/SQuAD/basic_cnn/templates/visualizer.html
Normal file
76
tensorflow/SQuAD/basic_cnn/templates/visualizer.html
Normal file
|
@ -0,0 +1,76 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>{{ title }}</title>
|
||||
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.12.0/jquery.min.js"></script>
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/chroma-js/1.1.1/chroma.min.js"></script>
|
||||
<script>
|
||||
$(document).ready(function(){
|
||||
$(".att").each(function() {
|
||||
// var val = parseFloat($(this).text());
|
||||
var val = parseFloat($(this).attr("color"));
|
||||
var scale = chroma.scale(['white', 'red']);
|
||||
var color = scale(val).hex();
|
||||
$(this).attr("bgcolor", color);
|
||||
});
|
||||
})
|
||||
</script>
|
||||
</head>
|
||||
<style>
|
||||
table, th, td {border: 1px solid black}
|
||||
</style>
|
||||
<body>
|
||||
<h2>{{ title }}</h2>
|
||||
<table>
|
||||
<tr>
|
||||
<th>ID</th>
|
||||
<th>Question</th>
|
||||
<th>Answers</th>
|
||||
<th>Predicted</th>
|
||||
<th>Score</th>
|
||||
<th>Paragraph</th>
|
||||
</tr>
|
||||
{% for row in rows %}
|
||||
<tr>
|
||||
<td>{{ row.id }}</td>
|
||||
<td>
|
||||
{% for qj in row.ques %}
|
||||
{{ qj }}
|
||||
{% endfor %}
|
||||
</td>
|
||||
<td>
|
||||
{% for aa in row.a %}
|
||||
<li>{{ aa }}</li>
|
||||
{% endfor %}
|
||||
</td>
|
||||
<td>{{ row.ap }}</td>
|
||||
<td>{{ row.score }}</td>
|
||||
<td>
|
||||
<table>
|
||||
{% for xj, ypj, yp2j in zip(row.para, row.yp, row.yp2) %}
|
||||
<tr>
|
||||
{% set rowloop = loop %}
|
||||
{% for xjk, ypjk in zip(xj, ypj) %}
|
||||
<td class="att" color="{{ ypjk }}">
|
||||
{% if row.y[0][0] == rowloop.index0 and row.y[0][1] <= loop.index0 <= row.y[1][1] %}
|
||||
<b>{{ xjk }}</b>
|
||||
{% else %}
|
||||
{{ xjk }}
|
||||
{% endif %}
|
||||
</td>
|
||||
{% endfor %}
|
||||
</tr>
|
||||
<tr>
|
||||
{% for xjk, yp2jk in zip(xj, yp2j) %}
|
||||
<td class="att" color="{{ yp2jk }}">-</td>
|
||||
{% endfor %}
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
73
tensorflow/SQuAD/basic_cnn/trainer.py
Normal file
73
tensorflow/SQuAD/basic_cnn/trainer.py
Normal file
|
@ -0,0 +1,73 @@
|
|||
import tensorflow as tf
|
||||
|
||||
from basic_cnn.model import Model
|
||||
from my.tensorflow import average_gradients
|
||||
|
||||
|
||||
class Trainer(object):
|
||||
def __init__(self, config, model):
|
||||
assert isinstance(model, Model)
|
||||
self.config = config
|
||||
self.model = model
|
||||
self.opt = tf.train.AdadeltaOptimizer(config.init_lr)
|
||||
self.loss = model.get_loss()
|
||||
self.var_list = model.get_var_list()
|
||||
self.global_step = model.get_global_step()
|
||||
self.summary = model.summary
|
||||
self.grads = self.opt.compute_gradients(self.loss, var_list=self.var_list)
|
||||
self.train_op = self.opt.apply_gradients(self.grads, global_step=self.global_step)
|
||||
|
||||
def get_train_op(self):
|
||||
return self.train_op
|
||||
|
||||
def step(self, sess, batch, get_summary=False):
|
||||
assert isinstance(sess, tf.Session)
|
||||
_, ds = batch
|
||||
feed_dict = self.model.get_feed_dict(ds, True)
|
||||
if get_summary:
|
||||
loss, summary, train_op = \
|
||||
sess.run([self.loss, self.summary, self.train_op], feed_dict=feed_dict)
|
||||
else:
|
||||
loss, train_op = sess.run([self.loss, self.train_op], feed_dict=feed_dict)
|
||||
summary = None
|
||||
return loss, summary, train_op
|
||||
|
||||
|
||||
class MultiGPUTrainer(object):
|
||||
def __init__(self, config, models):
|
||||
model = models[0]
|
||||
assert isinstance(model, Model)
|
||||
self.config = config
|
||||
self.model = model
|
||||
self.opt = tf.train.AdadeltaOptimizer(config.init_lr)
|
||||
self.var_list = model.get_var_list()
|
||||
self.global_step = model.get_global_step()
|
||||
self.summary = model.summary
|
||||
self.models = models
|
||||
losses = []
|
||||
grads_list = []
|
||||
for gpu_idx, model in enumerate(models):
|
||||
with tf.name_scope("grads_{}".format(gpu_idx)), tf.device("/gpu:{}".format(gpu_idx)):
|
||||
loss = model.get_loss()
|
||||
grads = self.opt.compute_gradients(loss, var_list=self.var_list)
|
||||
losses.append(loss)
|
||||
grads_list.append(grads)
|
||||
|
||||
self.loss = tf.add_n(losses)/len(losses)
|
||||
self.grads = average_gradients(grads_list)
|
||||
self.train_op = self.opt.apply_gradients(self.grads, global_step=self.global_step)
|
||||
|
||||
def step(self, sess, batches, get_summary=False):
|
||||
assert isinstance(sess, tf.Session)
|
||||
feed_dict = {}
|
||||
for batch, model in zip(batches, self.models):
|
||||
_, ds = batch
|
||||
feed_dict.update(model.get_feed_dict(ds, True))
|
||||
|
||||
if get_summary:
|
||||
loss, summary, train_op = \
|
||||
sess.run([self.loss, self.summary, self.train_op], feed_dict=feed_dict)
|
||||
else:
|
||||
loss, train_op = sess.run([self.loss, self.train_op], feed_dict=feed_dict)
|
||||
summary = None
|
||||
return loss, summary, train_op
|
137
tensorflow/SQuAD/basic_cnn/visualizer.py
Normal file
137
tensorflow/SQuAD/basic_cnn/visualizer.py
Normal file
|
@ -0,0 +1,137 @@
|
|||
import shutil
|
||||
from collections import OrderedDict
|
||||
import http.server
|
||||
import socketserver
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
|
||||
from basic_cnn.evaluator import get_span_score_pairs, get_best_span
|
||||
|
||||
|
||||
def bool_(string):
|
||||
if string == 'True':
|
||||
return True
|
||||
elif string == 'False':
|
||||
return False
|
||||
else:
|
||||
raise Exception()
|
||||
|
||||
def get_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--model_name", type=str, default='basic')
|
||||
parser.add_argument("--data_type", type=str, default='dev')
|
||||
parser.add_argument("--step", type=int, default=5000)
|
||||
parser.add_argument("--template_name", type=str, default="visualizer.html")
|
||||
parser.add_argument("--num_per_page", type=int, default=100)
|
||||
parser.add_argument("--data_dir", type=str, default="data/squad")
|
||||
parser.add_argument("--port", type=int, default=8000)
|
||||
parser.add_argument("--host", type=str, default="0.0.0.0")
|
||||
parser.add_argument("--open", type=str, default='False')
|
||||
parser.add_argument("--run_id", type=str, default="0")
|
||||
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def _decode(decoder, sent):
|
||||
return " ".join(decoder[idx] for idx in sent)
|
||||
|
||||
|
||||
def accuracy2_visualizer(args):
|
||||
model_name = args.model_name
|
||||
data_type = args.data_type
|
||||
num_per_page = args.num_per_page
|
||||
data_dir = args.data_dir
|
||||
run_id = args.run_id.zfill(2)
|
||||
step = args.step
|
||||
|
||||
eval_path =os.path.join("out", model_name, run_id, "eval", "{}-{}.json".format(data_type, str(step).zfill(6)))
|
||||
print("loading {}".format(eval_path))
|
||||
eval_ = json.load(open(eval_path, 'r'))
|
||||
|
||||
_id = 0
|
||||
html_dir = "/tmp/list_results%d" % _id
|
||||
while os.path.exists(html_dir):
|
||||
_id += 1
|
||||
html_dir = "/tmp/list_results%d" % _id
|
||||
|
||||
if os.path.exists(html_dir):
|
||||
shutil.rmtree(html_dir)
|
||||
os.mkdir(html_dir)
|
||||
|
||||
cur_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
templates_dir = os.path.join(cur_dir, 'templates')
|
||||
env = Environment(loader=FileSystemLoader(templates_dir))
|
||||
env.globals.update(zip=zip, reversed=reversed)
|
||||
template = env.get_template(args.template_name)
|
||||
|
||||
data_path = os.path.join(data_dir, "data_{}.json".format(data_type))
|
||||
shared_path = os.path.join(data_dir, "shared_{}.json".format(data_type))
|
||||
print("loading {}".format(data_path))
|
||||
data = json.load(open(data_path, 'r'))
|
||||
print("loading {}".format(shared_path))
|
||||
shared = json.load(open(shared_path, 'r'))
|
||||
|
||||
rows = []
|
||||
for i, (idx, yi, ypi, yp2i) in tqdm(enumerate(zip(*[eval_[key] for key in ('idxs', 'y', 'yp', 'yp2')])), total=len(eval_['idxs'])):
|
||||
id_, q, rx, answers = (data[key][idx] for key in ('ids', 'q', '*x', 'answerss'))
|
||||
x = shared['x'][rx[0]][rx[1]]
|
||||
ques = [" ".join(q)]
|
||||
para = [[word for word in sent] for sent in x]
|
||||
span = get_best_span(ypi, yp2i)
|
||||
ap = get_segment(para, span)
|
||||
score = "{:.3f}".format(ypi[span[0][0]][span[0][1]] * yp2i[span[1][0]][span[1][1]-1])
|
||||
|
||||
row = {
|
||||
'id': id_,
|
||||
'title': "Hello world!",
|
||||
'ques': ques,
|
||||
'para': para,
|
||||
'y': yi[0][0],
|
||||
'y2': yi[0][1],
|
||||
'yp': ypi,
|
||||
'yp2': yp2i,
|
||||
'a': answers,
|
||||
'ap': ap,
|
||||
'score': score
|
||||
}
|
||||
rows.append(row)
|
||||
|
||||
if i % num_per_page == 0:
|
||||
html_path = os.path.join(html_dir, "%s.html" % str(i).zfill(8))
|
||||
|
||||
if (i + 1) % num_per_page == 0 or (i + 1) == len(eval_['y']):
|
||||
var_dict = {'title': "Accuracy Visualization",
|
||||
'rows': rows
|
||||
}
|
||||
with open(html_path, "wb") as f:
|
||||
f.write(template.render(**var_dict).encode('UTF-8'))
|
||||
rows = []
|
||||
|
||||
os.chdir(html_dir)
|
||||
port = args.port
|
||||
host = args.host
|
||||
# Overriding to suppress log message
|
||||
class MyHandler(http.server.SimpleHTTPRequestHandler):
|
||||
def log_message(self, format, *args):
|
||||
pass
|
||||
handler = MyHandler
|
||||
httpd = socketserver.TCPServer((host, port), handler)
|
||||
if args.open == 'True':
|
||||
os.system("open http://%s:%d" % (args.host, args.port))
|
||||
print("serving at %s:%d" % (host, port))
|
||||
httpd.serve_forever()
|
||||
|
||||
|
||||
def get_segment(para, span):
|
||||
return " ".join(para[span[0][0]][span[0][1]:span[1][1]])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
ARGS = get_args()
|
||||
accuracy2_visualizer(ARGS)
|
0
tensorflow/SQuAD/cnn_dm/__init__.py
Normal file
0
tensorflow/SQuAD/cnn_dm/__init__.py
Normal file
359
tensorflow/SQuAD/cnn_dm/eda.ipynb
Normal file
359
tensorflow/SQuAD/cnn_dm/eda.ipynb
Normal file
File diff suppressed because one or more lines are too long
38
tensorflow/SQuAD/cnn_dm/evaluate.py
Normal file
38
tensorflow/SQuAD/cnn_dm/evaluate.py
Normal file
|
@ -0,0 +1,38 @@
|
|||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
root_dir = sys.argv[1]
|
||||
answer_path = sys.argv[2]
|
||||
file_names = os.listdir(root_dir)
|
||||
|
||||
num_correct = 0
|
||||
num_wrong = 0
|
||||
|
||||
with open(answer_path, 'r') as fh:
|
||||
id2answer_dict = json.load(fh)
|
||||
|
||||
for file_name in file_names:
|
||||
if not file_name.endswith(".question"):
|
||||
continue
|
||||
with open(os.path.join(root_dir, file_name), 'r') as fh:
|
||||
url = fh.readline().strip()
|
||||
_ = fh.readline()
|
||||
para = fh.readline().strip()
|
||||
_ = fh.readline()
|
||||
ques = fh.readline().strip()
|
||||
_ = fh.readline()
|
||||
answer = fh.readline().strip()
|
||||
_ = fh.readline()
|
||||
if file_name in id2answer_dict:
|
||||
pred = id2answer_dict[file_name]
|
||||
if pred == answer:
|
||||
num_correct += 1
|
||||
else:
|
||||
num_wrong += 1
|
||||
else:
|
||||
num_wrong += 1
|
||||
|
||||
total = num_correct + num_wrong
|
||||
acc = float(num_correct) / total
|
||||
print("{} = {} / {}".format(acc, num_correct, total))
|
185
tensorflow/SQuAD/cnn_dm/prepro.py
Normal file
185
tensorflow/SQuAD/cnn_dm/prepro.py
Normal file
|
@ -0,0 +1,185 @@
|
|||
import argparse
|
||||
import json
|
||||
import os
|
||||
# data: q, cq, (dq), (pq), y, *x, *cx
|
||||
# shared: x, cx, (dx), (px), word_counter, char_counter, word2vec
|
||||
# no metadata
|
||||
from collections import Counter
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
from my.utils import process_tokens
|
||||
from squad.utils import get_word_span, process_tokens
|
||||
|
||||
|
||||
def bool_(arg):
|
||||
if arg == 'True':
|
||||
return True
|
||||
elif arg == 'False':
|
||||
return False
|
||||
raise Exception(arg)
|
||||
|
||||
|
||||
def main():
|
||||
args = get_args()
|
||||
prepro(args)
|
||||
|
||||
|
||||
def get_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
home = os.path.expanduser("~")
|
||||
source_dir = os.path.join(home, "data", "cnn", 'questions')
|
||||
target_dir = "data/cnn"
|
||||
glove_dir = os.path.join(home, "data", "glove")
|
||||
parser.add_argument("--source_dir", default=source_dir)
|
||||
parser.add_argument("--target_dir", default=target_dir)
|
||||
parser.add_argument("--glove_dir", default=glove_dir)
|
||||
parser.add_argument("--glove_corpus", default='6B')
|
||||
parser.add_argument("--glove_vec_size", default=100, type=int)
|
||||
parser.add_argument("--debug", default=False, type=bool_)
|
||||
parser.add_argument("--num_sents_th", default=200, type=int)
|
||||
parser.add_argument("--ques_size_th", default=30, type=int)
|
||||
parser.add_argument("--width", default=5, type=int)
|
||||
# TODO : put more args here
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def prepro(args):
|
||||
prepro_each(args, 'train')
|
||||
prepro_each(args, 'dev')
|
||||
prepro_each(args, 'test')
|
||||
|
||||
|
||||
def para2sents(para, width):
|
||||
"""
|
||||
Turn para into double array of words (wordss)
|
||||
Where each sentence is up to 5 word neighbors of each entity
|
||||
:param para:
|
||||
:return:
|
||||
"""
|
||||
words = para.split(" ")
|
||||
sents = []
|
||||
for i, word in enumerate(words):
|
||||
if word.startswith("@"):
|
||||
start = max(i - width, 0)
|
||||
stop = min(i + width + 1, len(words))
|
||||
sent = words[start:stop]
|
||||
sents.append(sent)
|
||||
return sents
|
||||
|
||||
|
||||
def get_word2vec(args, word_counter):
|
||||
glove_path = os.path.join(args.glove_dir, "glove.{}.{}d.txt".format(args.glove_corpus, args.glove_vec_size))
|
||||
sizes = {'6B': int(4e5), '42B': int(1.9e6), '840B': int(2.2e6), '2B': int(1.2e6)}
|
||||
total = sizes[args.glove_corpus]
|
||||
word2vec_dict = {}
|
||||
with open(glove_path, 'r', encoding='utf-8') as fh:
|
||||
for line in tqdm(fh, total=total):
|
||||
array = line.lstrip().rstrip().split(" ")
|
||||
word = array[0]
|
||||
vector = list(map(float, array[1:]))
|
||||
if word in word_counter:
|
||||
word2vec_dict[word] = vector
|
||||
elif word.capitalize() in word_counter:
|
||||
word2vec_dict[word.capitalize()] = vector
|
||||
elif word.lower() in word_counter:
|
||||
word2vec_dict[word.lower()] = vector
|
||||
elif word.upper() in word_counter:
|
||||
word2vec_dict[word.upper()] = vector
|
||||
|
||||
print("{}/{} of word vocab have corresponding vectors in {}".format(len(word2vec_dict), len(word_counter), glove_path))
|
||||
return word2vec_dict
|
||||
|
||||
|
||||
def prepro_each(args, mode):
|
||||
source_dir = os.path.join(args.source_dir, mode)
|
||||
word_counter = Counter()
|
||||
lower_word_counter = Counter()
|
||||
ent_counter = Counter()
|
||||
char_counter = Counter()
|
||||
max_sent_size = 0
|
||||
max_word_size = 0
|
||||
max_ques_size = 0
|
||||
max_num_sents = 0
|
||||
|
||||
file_names = list(os.listdir(source_dir))
|
||||
if args.debug:
|
||||
file_names = file_names[:1000]
|
||||
lens = []
|
||||
|
||||
out_file_names = []
|
||||
for file_name in tqdm(file_names, total=len(file_names)):
|
||||
if file_name.endswith(".question"):
|
||||
with open(os.path.join(source_dir, file_name), 'r') as fh:
|
||||
url = fh.readline().strip()
|
||||
_ = fh.readline()
|
||||
para = fh.readline().strip()
|
||||
_ = fh.readline()
|
||||
ques = fh.readline().strip()
|
||||
_ = fh.readline()
|
||||
answer = fh.readline().strip()
|
||||
_ = fh.readline()
|
||||
cands = list(line.strip() for line in fh)
|
||||
cand_ents = list(cand.split(":")[0] for cand in cands)
|
||||
sents = para2sents(para, args.width)
|
||||
ques_words = ques.split(" ")
|
||||
|
||||
# Filtering
|
||||
if len(sents) > args.num_sents_th or len(ques_words) > args.ques_size_th:
|
||||
continue
|
||||
|
||||
max_sent_size = max(max(map(len, sents)), max_sent_size)
|
||||
max_ques_size = max(len(ques_words), max_ques_size)
|
||||
max_word_size = max(max(len(word) for sent in sents for word in sent), max_word_size)
|
||||
max_num_sents = max(len(sents), max_num_sents)
|
||||
|
||||
for word in ques_words:
|
||||
if word.startswith("@"):
|
||||
ent_counter[word] += 1
|
||||
word_counter[word] += 1
|
||||
else:
|
||||
word_counter[word] += 1
|
||||
lower_word_counter[word.lower()] += 1
|
||||
for c in word:
|
||||
char_counter[c] += 1
|
||||
for sent in sents:
|
||||
for word in sent:
|
||||
if word.startswith("@"):
|
||||
ent_counter[word] += 1
|
||||
word_counter[word] += 1
|
||||
else:
|
||||
word_counter[word] += 1
|
||||
lower_word_counter[word.lower()] += 1
|
||||
for c in word:
|
||||
char_counter[c] += 1
|
||||
|
||||
out_file_names.append(file_name)
|
||||
lens.append(len(sents))
|
||||
num_examples = len(out_file_names)
|
||||
|
||||
assert len(out_file_names) == len(lens)
|
||||
sorted_file_names, lens = zip(*sorted(zip(out_file_names, lens), key=lambda each: each[1]))
|
||||
assert lens[-1] == max_num_sents
|
||||
|
||||
word2vec_dict = get_word2vec(args, word_counter)
|
||||
lower_word2vec_dit = get_word2vec(args, lower_word_counter)
|
||||
|
||||
shared = {'word_counter': word_counter, 'ent_counter': ent_counter, 'char_counter': char_counter,
|
||||
'lower_word_counter': lower_word_counter,
|
||||
'max_num_sents': max_num_sents, 'max_sent_size': max_sent_size, 'max_word_size': max_word_size,
|
||||
'max_ques_size': max_ques_size,
|
||||
'word2vec': word2vec_dict, 'lower_word2vec': lower_word2vec_dit, 'sorted': sorted_file_names,
|
||||
'num_examples': num_examples}
|
||||
|
||||
print("max num sents: {}".format(max_num_sents))
|
||||
print("max ques size: {}".format(max_ques_size))
|
||||
|
||||
if not os.path.exists(args.target_dir):
|
||||
os.makedirs(args.target_dir)
|
||||
shared_path = os.path.join(args.target_dir, "shared_{}.json".format(mode))
|
||||
with open(shared_path, 'w') as fh:
|
||||
json.dump(shared, fh)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
25
tensorflow/SQuAD/download.sh
Executable file
25
tensorflow/SQuAD/download.sh
Executable file
|
@ -0,0 +1,25 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
DATA_DIR=$HOME/data
|
||||
mkdir $DATA_DIR
|
||||
|
||||
# Download SQuAD
|
||||
SQUAD_DIR=$DATA_DIR/squad
|
||||
mkdir $SQUAD_DIR
|
||||
wget https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json -O $SQUAD_DIR/train-v1.1.json
|
||||
wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json -O $SQUAD_DIR/dev-v1.1.json
|
||||
|
||||
|
||||
# Download CNN and DailyMail
|
||||
# Download at: http://cs.nyu.edu/~kcho/DMQA/
|
||||
|
||||
|
||||
# Download GloVe
|
||||
GLOVE_DIR=$DATA_DIR/glove
|
||||
mkdir $GLOVE_DIR
|
||||
wget http://nlp.stanford.edu/data/glove.6B.zip -O $GLOVE_DIR/glove.6B.zip
|
||||
unzip $GLOVE_DIR/glove.6B.zip -d $GLOVE_DIR
|
||||
|
||||
# Download NLTK (for tokenizer)
|
||||
# Make sure that nltk is installed!
|
||||
python3 -m nltk.downloader -d $HOME/nltk_data punkt
|
0
tensorflow/SQuAD/my/__init__.py
Normal file
0
tensorflow/SQuAD/my/__init__.py
Normal file
55
tensorflow/SQuAD/my/corenlp_interface.py
Normal file
55
tensorflow/SQuAD/my/corenlp_interface.py
Normal file
|
@ -0,0 +1,55 @@
|
|||
import logging
|
||||
|
||||
import requests
|
||||
import nltk
|
||||
import json
|
||||
import networkx as nx
|
||||
import time
|
||||
|
||||
|
||||
class CoreNLPInterface(object):
|
||||
def __init__(self, url, port):
|
||||
self._url = url
|
||||
self._port = port
|
||||
|
||||
def get(self, type_, in_, num_max_requests=100):
|
||||
in_ = in_.encode("utf-8")
|
||||
url = "http://{}:{}/{}".format(self._url, self._port, type_)
|
||||
out = None
|
||||
for _ in range(num_max_requests):
|
||||
try:
|
||||
r = requests.post(url, data=in_)
|
||||
out = r.content.decode('utf-8')
|
||||
if out == 'error':
|
||||
out = None
|
||||
break
|
||||
except:
|
||||
time.sleep(1)
|
||||
return out
|
||||
|
||||
def split_doc(self, doc):
|
||||
out = self.get("doc", doc)
|
||||
return out if out is None else json.loads(out)
|
||||
|
||||
def split_sent(self, sent):
|
||||
out = self.get("sent", sent)
|
||||
return out if out is None else json.loads(out)
|
||||
|
||||
def get_dep(self, sent):
|
||||
out = self.get("dep", sent)
|
||||
return out if out is None else json.loads(out)
|
||||
|
||||
def get_const(self, sent):
|
||||
out = self.get("const", sent)
|
||||
return out
|
||||
|
||||
def get_const_tree(self, sent):
|
||||
out = self.get_const(sent)
|
||||
return out if out is None else nltk.tree.Tree.fromstring(out)
|
||||
|
||||
@staticmethod
|
||||
def dep2tree(dep):
|
||||
tree = nx.DiGraph()
|
||||
for dep, i, gov, j, label in dep:
|
||||
tree.add_edge(gov, dep, label=label)
|
||||
return tree
|
129
tensorflow/SQuAD/my/nltk_utils.py
Normal file
129
tensorflow/SQuAD/my/nltk_utils.py
Normal file
|
@ -0,0 +1,129 @@
|
|||
import nltk
|
||||
import numpy as np
|
||||
|
||||
|
||||
def _set_span(t, i):
|
||||
if isinstance(t[0], str):
|
||||
t.span = (i, i+len(t))
|
||||
else:
|
||||
first = True
|
||||
for c in t:
|
||||
cur_span = _set_span(c, i)
|
||||
i = cur_span[1]
|
||||
if first:
|
||||
min_ = cur_span[0]
|
||||
first = False
|
||||
max_ = cur_span[1]
|
||||
t.span = (min_, max_)
|
||||
return t.span
|
||||
|
||||
|
||||
def set_span(t):
|
||||
assert isinstance(t, nltk.tree.Tree)
|
||||
try:
|
||||
return _set_span(t, 0)
|
||||
except:
|
||||
print(t)
|
||||
exit()
|
||||
|
||||
|
||||
def tree_contains_span(tree, span):
|
||||
"""
|
||||
Assumes that tree span has been set with set_span
|
||||
Returns true if any subtree of t has exact span as the given span
|
||||
:param t:
|
||||
:param span:
|
||||
:return bool:
|
||||
"""
|
||||
return span in set(t.span for t in tree.subtrees())
|
||||
|
||||
|
||||
def span_len(span):
|
||||
return span[1] - span[0]
|
||||
|
||||
|
||||
def span_overlap(s1, s2):
|
||||
start = max(s1[0], s2[0])
|
||||
stop = min(s1[1], s2[1])
|
||||
if stop > start:
|
||||
return start, stop
|
||||
return None
|
||||
|
||||
|
||||
def span_prec(true_span, pred_span):
|
||||
overlap = span_overlap(true_span, pred_span)
|
||||
if overlap is None:
|
||||
return 0
|
||||
return span_len(overlap) / span_len(pred_span)
|
||||
|
||||
|
||||
def span_recall(true_span, pred_span):
|
||||
overlap = span_overlap(true_span, pred_span)
|
||||
if overlap is None:
|
||||
return 0
|
||||
return span_len(overlap) / span_len(true_span)
|
||||
|
||||
|
||||
def span_f1(true_span, pred_span):
|
||||
p = span_prec(true_span, pred_span)
|
||||
r = span_recall(true_span, pred_span)
|
||||
if p == 0 or r == 0:
|
||||
return 0.0
|
||||
return 2 * p * r / (p + r)
|
||||
|
||||
|
||||
def find_max_f1_span(tree, span):
|
||||
return find_max_f1_subtree(tree, span).span
|
||||
|
||||
|
||||
def find_max_f1_subtree(tree, span):
|
||||
return max(((t, span_f1(span, t.span)) for t in tree.subtrees()), key=lambda p: p[1])[0]
|
||||
|
||||
|
||||
def tree2matrix(tree, node2num, row_size=None, col_size=None, dtype='int32'):
|
||||
set_span(tree)
|
||||
D = tree.height() - 1
|
||||
B = len(tree.leaves())
|
||||
row_size = row_size or D
|
||||
col_size = col_size or B
|
||||
matrix = np.zeros([row_size, col_size], dtype=dtype)
|
||||
mask = np.zeros([row_size, col_size, col_size], dtype='bool')
|
||||
|
||||
for subtree in tree.subtrees():
|
||||
row = subtree.height() - 2
|
||||
col = subtree.span[0]
|
||||
matrix[row, col] = node2num(subtree)
|
||||
for subsub in subtree.subtrees():
|
||||
if isinstance(subsub, nltk.tree.Tree):
|
||||
mask[row, col, subsub.span[0]] = True
|
||||
if not isinstance(subsub[0], nltk.tree.Tree):
|
||||
c = subsub.span[0]
|
||||
for r in range(row):
|
||||
mask[r, c, c] = True
|
||||
else:
|
||||
mask[row, col, col] = True
|
||||
|
||||
return matrix, mask
|
||||
|
||||
|
||||
def load_compressed_tree(s):
|
||||
|
||||
def compress_tree(tree):
|
||||
assert not isinstance(tree, str)
|
||||
if len(tree) == 1:
|
||||
if isinstance(tree[0], nltk.tree.Tree):
|
||||
return compress_tree(tree[0])
|
||||
else:
|
||||
return tree
|
||||
else:
|
||||
for i, t in enumerate(tree):
|
||||
if isinstance(t, nltk.tree.Tree):
|
||||
tree[i] = compress_tree(t)
|
||||
else:
|
||||
tree[i] = t
|
||||
return tree
|
||||
|
||||
return compress_tree(nltk.tree.Tree.fromstring(s))
|
||||
|
||||
|
||||
|
1
tensorflow/SQuAD/my/tensorflow/__init__.py
Normal file
1
tensorflow/SQuAD/my/tensorflow/__init__.py
Normal file
|
@ -0,0 +1 @@
|
|||
from my.tensorflow.general import *
|
177
tensorflow/SQuAD/my/tensorflow/general.py
Normal file
177
tensorflow/SQuAD/my/tensorflow/general.py
Normal file
|
@ -0,0 +1,177 @@
|
|||
from itertools import zip_longest
|
||||
|
||||
import itertools
|
||||
import tensorflow as tf
|
||||
from functools import reduce
|
||||
from operator import mul
|
||||
import numpy as np
|
||||
|
||||
VERY_BIG_NUMBER = 1e30
|
||||
VERY_SMALL_NUMBER = 1e-30
|
||||
VERY_POSITIVE_NUMBER = VERY_BIG_NUMBER
|
||||
VERY_NEGATIVE_NUMBER = -VERY_BIG_NUMBER
|
||||
|
||||
|
||||
def get_initializer(matrix):
|
||||
def _initializer(shape, dtype=None, partition_info=None, **kwargs): return matrix
|
||||
return _initializer
|
||||
|
||||
|
||||
def variable_on_cpu(name, shape, initializer):
|
||||
"""Helper to create a Variable stored on CPU memory.
|
||||
|
||||
Args:
|
||||
name: name of the variable
|
||||
shape: list of ints
|
||||
initializer: initializer for Variable
|
||||
|
||||
Returns:
|
||||
Variable Tensor
|
||||
"""
|
||||
with tf.device('/cpu:0'):
|
||||
var = tf.get_variable(name, shape, initializer=initializer)
|
||||
return var
|
||||
|
||||
|
||||
def variable_with_weight_decay(name, shape, stddev, wd):
|
||||
"""Helper to create an initialized Variable with weight decay.
|
||||
|
||||
Note that the Variable is initialized with a truncated normal distribution.
|
||||
A weight decay is added only if one is specified.
|
||||
|
||||
Args:
|
||||
name: name of the variable
|
||||
shape: list of ints
|
||||
stddev: standard deviation of a truncated Gaussian
|
||||
wd: add L2Loss weight decay multiplied by this float. If None, weight
|
||||
decay is not added for this Variable.
|
||||
|
||||
Returns:
|
||||
Variable Tensor
|
||||
"""
|
||||
var = variable_on_cpu(name, shape,
|
||||
tf.truncated_normal_initializer(stddev=stddev))
|
||||
if wd:
|
||||
weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')
|
||||
tf.add_to_collection('losses', weight_decay)
|
||||
return var
|
||||
|
||||
|
||||
def average_gradients(tower_grads):
|
||||
"""Calculate the average gradient for each shared variable across all towers.
|
||||
|
||||
Note that this function provides a synchronization point across all towers.
|
||||
|
||||
Args:
|
||||
tower_grads: List of lists of (gradient, variable) tuples. The outer list
|
||||
is over individual gradients. The inner list is over the gradient
|
||||
calculation for each tower.
|
||||
Returns:
|
||||
List of pairs of (gradient, variable) where the gradient has been averaged
|
||||
across all towers.
|
||||
"""
|
||||
average_grads = []
|
||||
for grad_and_vars in zip(*tower_grads):
|
||||
# Note that each grad_and_vars looks like the following:
|
||||
# ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
|
||||
grads = []
|
||||
for g, var in grad_and_vars:
|
||||
# Add 0 dimension to the gradients to represent the tower.
|
||||
assert g is not None, var.name
|
||||
expanded_g = tf.expand_dims(g, 0)
|
||||
|
||||
# Append on a 'tower' dimension which we will average over below.
|
||||
grads.append(expanded_g)
|
||||
|
||||
# Average over the 'tower' dimension.
|
||||
grad = tf.concat(axis=0, values=grads)
|
||||
grad = tf.reduce_mean(grad, 0)
|
||||
|
||||
# Keep in mind that the Variables are redundant because they are shared
|
||||
# across towers. So .. we will just return the first tower's pointer to
|
||||
# the Variable.
|
||||
v = grad_and_vars[0][1]
|
||||
grad_and_var = (grad, v)
|
||||
average_grads.append(grad_and_var)
|
||||
return average_grads
|
||||
|
||||
|
||||
def mask(val, mask, name=None):
|
||||
if name is None:
|
||||
name = 'mask'
|
||||
return tf.multiply(val, tf.cast(mask, 'float'), name=name)
|
||||
|
||||
|
||||
def exp_mask(val, mask, name=None):
|
||||
"""Give very negative number to unmasked elements in val.
|
||||
For example, [-3, -2, 10], [True, True, False] -> [-3, -2, -1e9].
|
||||
Typically, this effectively masks in exponential space (e.g. softmax)
|
||||
Args:
|
||||
val: values to be masked
|
||||
mask: masking boolean tensor, same shape as tensor
|
||||
name: name for output tensor
|
||||
|
||||
Returns:
|
||||
Same shape as val, where some elements are very small (exponentially zero)
|
||||
"""
|
||||
if name is None:
|
||||
name = "exp_mask"
|
||||
return tf.add(val, (1 - tf.cast(mask, 'float')) * VERY_NEGATIVE_NUMBER, name=name)
|
||||
|
||||
|
||||
def flatten(tensor, keep):
|
||||
fixed_shape = tensor.get_shape().as_list()
|
||||
start = len(fixed_shape) - keep
|
||||
left = reduce(mul, [fixed_shape[i] or tf.shape(tensor)[i] for i in range(start)])
|
||||
out_shape = [left] + [fixed_shape[i] or tf.shape(tensor)[i] for i in range(start, len(fixed_shape))]
|
||||
flat = tf.reshape(tensor, out_shape)
|
||||
return flat
|
||||
|
||||
|
||||
def reconstruct(tensor, ref, keep):
|
||||
ref_shape = ref.get_shape().as_list()
|
||||
tensor_shape = tensor.get_shape().as_list()
|
||||
ref_stop = len(ref_shape) - keep
|
||||
tensor_start = len(tensor_shape) - keep
|
||||
pre_shape = [ref_shape[i] or tf.shape(ref)[i] for i in range(ref_stop)]
|
||||
keep_shape = [tensor_shape[i] or tf.shape(tensor)[i] for i in range(tensor_start, len(tensor_shape))]
|
||||
# pre_shape = [tf.shape(ref)[i] for i in range(len(ref.get_shape().as_list()[:-keep]))]
|
||||
# keep_shape = tensor.get_shape().as_list()[-keep:]
|
||||
target_shape = pre_shape + keep_shape
|
||||
out = tf.reshape(tensor, target_shape)
|
||||
return out
|
||||
|
||||
|
||||
def add_wd(wd, scope=None):
|
||||
scope = scope or tf.get_variable_scope().name
|
||||
variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope)
|
||||
with tf.name_scope("weight_decay"):
|
||||
for var in variables:
|
||||
weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name="{}/wd".format(var.op.name))
|
||||
tf.add_to_collection('losses', weight_decay)
|
||||
|
||||
|
||||
def grouper(iterable, n, fillvalue=None, shorten=False, num_groups=None):
|
||||
args = [iter(iterable)] * n
|
||||
out = zip_longest(*args, fillvalue=fillvalue)
|
||||
out = list(out)
|
||||
if num_groups is not None:
|
||||
default = (fillvalue, ) * n
|
||||
assert isinstance(num_groups, int)
|
||||
out = list(each for each, _ in zip_longest(out, range(num_groups), fillvalue=default))
|
||||
if shorten:
|
||||
assert fillvalue is None
|
||||
out = (tuple(e for e in each if e is not None) for each in out)
|
||||
return out
|
||||
|
||||
def padded_reshape(tensor, shape, mode='CONSTANT', name=None):
|
||||
paddings = [[0, shape[i] - tf.shape(tensor)[i]] for i in range(len(shape))]
|
||||
return tf.pad(tensor, paddings, mode=mode, name=name)
|
||||
|
||||
|
||||
def get_num_params():
|
||||
num_params = 0
|
||||
for variable in tf.trainable_variables():
|
||||
shape = variable.get_shape()
|
||||
num_params += reduce(mul, [dim.value for dim in shape], 1)
|
||||
return num_params
|
180
tensorflow/SQuAD/my/tensorflow/nn.py
Normal file
180
tensorflow/SQuAD/my/tensorflow/nn.py
Normal file
|
@ -0,0 +1,180 @@
|
|||
from tensorflow.python.ops.rnn_cell_impl import _linear
|
||||
from tensorflow.python.util import nest
|
||||
import tensorflow as tf
|
||||
|
||||
from my.tensorflow import flatten, reconstruct, add_wd, exp_mask
|
||||
|
||||
|
||||
def linear(args, output_size, bias, bias_start=0.0, scope=None, squeeze=False, wd=0.0, input_keep_prob=1.0,
|
||||
is_train=None):
|
||||
if args is None or (nest.is_sequence(args) and not args):
|
||||
raise ValueError("`args` must be specified")
|
||||
if not nest.is_sequence(args):
|
||||
args = [args]
|
||||
|
||||
flat_args = [flatten(arg, 1) for arg in args]
|
||||
if input_keep_prob < 1.0:
|
||||
assert is_train is not None
|
||||
flat_args = [tf.cond(is_train, lambda: tf.nn.dropout(arg, input_keep_prob), lambda: arg)
|
||||
for arg in flat_args]
|
||||
with tf.variable_scope(scope or 'Linear'):
|
||||
flat_out = _linear(flat_args, output_size, bias, bias_initializer=tf.constant_initializer(bias_start))
|
||||
out = reconstruct(flat_out, args[0], 1)
|
||||
if squeeze:
|
||||
out = tf.squeeze(out, [len(args[0].get_shape().as_list())-1])
|
||||
if wd:
|
||||
add_wd(wd)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
def dropout(x, keep_prob, is_train, noise_shape=None, seed=None, name=None):
|
||||
with tf.name_scope(name or "dropout"):
|
||||
if keep_prob < 1.0:
|
||||
d = tf.nn.dropout(x, keep_prob, noise_shape=noise_shape, seed=seed)
|
||||
out = tf.cond(is_train, lambda: d, lambda: x)
|
||||
return out
|
||||
return x
|
||||
|
||||
|
||||
def softmax(logits, mask=None, scope=None):
|
||||
with tf.name_scope(scope or "Softmax"):
|
||||
if mask is not None:
|
||||
logits = exp_mask(logits, mask)
|
||||
flat_logits = flatten(logits, 1)
|
||||
flat_out = tf.nn.softmax(flat_logits)
|
||||
out = reconstruct(flat_out, logits, 1)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
def softsel(target, logits, mask=None, scope=None):
|
||||
"""
|
||||
|
||||
:param target: [ ..., J, d] dtype=float
|
||||
:param logits: [ ..., J], dtype=float
|
||||
:param mask: [ ..., J], dtype=bool
|
||||
:param scope:
|
||||
:return: [..., d], dtype=float
|
||||
"""
|
||||
with tf.name_scope(scope or "Softsel"):
|
||||
a = softmax(logits, mask=mask)
|
||||
target_rank = len(target.get_shape().as_list())
|
||||
out = tf.reduce_sum(tf.expand_dims(a, -1) * target, target_rank - 2)
|
||||
return out
|
||||
|
||||
|
||||
def double_linear_logits(args, size, bias, bias_start=0.0, scope=None, mask=None, wd=0.0, input_keep_prob=1.0, is_train=None):
|
||||
with tf.variable_scope(scope or "Double_Linear_Logits"):
|
||||
first = tf.tanh(linear(args, size, bias, bias_start=bias_start, scope='first',
|
||||
wd=wd, input_keep_prob=input_keep_prob, is_train=is_train))
|
||||
second = linear(first, 1, bias, bias_start=bias_start, squeeze=True, scope='second',
|
||||
wd=wd, input_keep_prob=input_keep_prob, is_train=is_train)
|
||||
if mask is not None:
|
||||
second = exp_mask(second, mask)
|
||||
return second
|
||||
|
||||
|
||||
def linear_logits(args, bias, bias_start=0.0, scope=None, mask=None, wd=0.0, input_keep_prob=1.0, is_train=None):
|
||||
with tf.variable_scope(scope or "Linear_Logits"):
|
||||
logits = linear(args, 1, bias, bias_start=bias_start, squeeze=True, scope='first',
|
||||
wd=wd, input_keep_prob=input_keep_prob, is_train=is_train)
|
||||
if mask is not None:
|
||||
logits = exp_mask(logits, mask)
|
||||
return logits
|
||||
|
||||
|
||||
def sum_logits(args, mask=None, name=None):
|
||||
with tf.name_scope(name or "sum_logits"):
|
||||
if args is None or (nest.is_sequence(args) and not args):
|
||||
raise ValueError("`args` must be specified")
|
||||
if not nest.is_sequence(args):
|
||||
args = [args]
|
||||
rank = len(args[0].get_shape())
|
||||
logits = sum(tf.reduce_sum(arg, rank-1) for arg in args)
|
||||
if mask is not None:
|
||||
logits = exp_mask(logits, mask)
|
||||
return logits
|
||||
|
||||
|
||||
def get_logits(args, size, bias, bias_start=0.0, scope=None, mask=None, wd=0.0, input_keep_prob=1.0, is_train=None, func=None):
|
||||
if func is None:
|
||||
func = "sum"
|
||||
if func == 'sum':
|
||||
return sum_logits(args, mask=mask, name=scope)
|
||||
elif func == 'linear':
|
||||
return linear_logits(args, bias, bias_start=bias_start, scope=scope, mask=mask, wd=wd, input_keep_prob=input_keep_prob,
|
||||
is_train=is_train)
|
||||
elif func == 'double':
|
||||
return double_linear_logits(args, size, bias, bias_start=bias_start, scope=scope, mask=mask, wd=wd, input_keep_prob=input_keep_prob,
|
||||
is_train=is_train)
|
||||
elif func == 'dot':
|
||||
assert len(args) == 2
|
||||
arg = args[0] * args[1]
|
||||
return sum_logits([arg], mask=mask, name=scope)
|
||||
elif func == 'mul_linear':
|
||||
assert len(args) == 2
|
||||
arg = args[0] * args[1]
|
||||
return linear_logits([arg], bias, bias_start=bias_start, scope=scope, mask=mask, wd=wd, input_keep_prob=input_keep_prob,
|
||||
is_train=is_train)
|
||||
elif func == 'proj':
|
||||
assert len(args) == 2
|
||||
d = args[1].get_shape()[-1]
|
||||
proj = linear([args[0]], d, False, bias_start=bias_start, scope=scope, wd=wd, input_keep_prob=input_keep_prob,
|
||||
is_train=is_train)
|
||||
return sum_logits([proj * args[1]], mask=mask)
|
||||
elif func == 'tri_linear':
|
||||
assert len(args) == 2
|
||||
new_arg = args[0] * args[1]
|
||||
return linear_logits([args[0], args[1], new_arg], bias, bias_start=bias_start, scope=scope, mask=mask, wd=wd, input_keep_prob=input_keep_prob,
|
||||
is_train=is_train)
|
||||
else:
|
||||
raise Exception()
|
||||
|
||||
|
||||
def highway_layer(arg, bias, bias_start=0.0, scope=None, wd=0.0, input_keep_prob=1.0, is_train=None):
|
||||
with tf.variable_scope(scope or "highway_layer"):
|
||||
d = arg.get_shape()[-1]
|
||||
trans = linear([arg], d, bias, bias_start=bias_start, scope='trans', wd=wd, input_keep_prob=input_keep_prob, is_train=is_train)
|
||||
trans = tf.nn.relu(trans)
|
||||
gate = linear([arg], d, bias, bias_start=bias_start, scope='gate', wd=wd, input_keep_prob=input_keep_prob, is_train=is_train)
|
||||
gate = tf.nn.sigmoid(gate)
|
||||
out = gate * trans + (1 - gate) * arg
|
||||
return out
|
||||
|
||||
|
||||
def highway_network(arg, num_layers, bias, bias_start=0.0, scope=None, wd=0.0, input_keep_prob=1.0, is_train=None):
|
||||
with tf.variable_scope(scope or "highway_network"):
|
||||
prev = arg
|
||||
cur = None
|
||||
for layer_idx in range(num_layers):
|
||||
cur = highway_layer(prev, bias, bias_start=bias_start, scope="layer_{}".format(layer_idx), wd=wd,
|
||||
input_keep_prob=input_keep_prob, is_train=is_train)
|
||||
prev = cur
|
||||
return cur
|
||||
|
||||
|
||||
def conv1d(in_, filter_size, height, padding, is_train=None, keep_prob=1.0, scope=None):
|
||||
with tf.variable_scope(scope or "conv1d"):
|
||||
num_channels = in_.get_shape()[-1]
|
||||
filter_ = tf.get_variable("filter", shape=[1, height, num_channels, filter_size], dtype='float')
|
||||
bias = tf.get_variable("bias", shape=[filter_size], dtype='float')
|
||||
strides = [1, 1, 1, 1]
|
||||
if is_train is not None and keep_prob < 1.0:
|
||||
in_ = dropout(in_, keep_prob, is_train)
|
||||
xxc = tf.nn.conv2d(in_, filter_, strides, padding) + bias # [N*M, JX, W/filter_stride, d]
|
||||
out = tf.reduce_max(tf.nn.relu(xxc), 2) # [-1, JX, d]
|
||||
return out
|
||||
|
||||
|
||||
def multi_conv1d(in_, filter_sizes, heights, padding, is_train=None, keep_prob=1.0, scope=None):
|
||||
with tf.variable_scope(scope or "multi_conv1d"):
|
||||
assert len(filter_sizes) == len(heights)
|
||||
outs = []
|
||||
for filter_size, height in zip(filter_sizes, heights):
|
||||
if filter_size == 0:
|
||||
continue
|
||||
out = conv1d(in_, filter_size, height, padding, is_train=is_train, keep_prob=keep_prob, scope="conv1d_{}".format(height))
|
||||
outs.append(out)
|
||||
concat_out = tf.concat(axis=2, values=outs)
|
||||
return concat_out
|
81
tensorflow/SQuAD/my/tensorflow/rnn.py
Normal file
81
tensorflow/SQuAD/my/tensorflow/rnn.py
Normal file
|
@ -0,0 +1,81 @@
|
|||
import tensorflow as tf
|
||||
from tensorflow.python.ops.rnn import dynamic_rnn as _dynamic_rnn, \
|
||||
bidirectional_dynamic_rnn as _bidirectional_dynamic_rnn
|
||||
|
||||
from my.tensorflow import flatten, reconstruct
|
||||
|
||||
|
||||
def dynamic_rnn(cell, inputs, sequence_length=None, initial_state=None,
|
||||
dtype=None, parallel_iterations=None, swap_memory=False,
|
||||
time_major=False, scope=None):
|
||||
assert not time_major # TODO : to be implemented later!
|
||||
flat_inputs = flatten(inputs, 2) # [-1, J, d]
|
||||
flat_len = None if sequence_length is None else tf.cast(flatten(sequence_length, 0), 'int64')
|
||||
|
||||
flat_outputs, final_state = _dynamic_rnn(cell, flat_inputs, sequence_length=flat_len,
|
||||
initial_state=initial_state, dtype=dtype,
|
||||
parallel_iterations=parallel_iterations, swap_memory=swap_memory,
|
||||
time_major=time_major, scope=scope)
|
||||
|
||||
outputs = reconstruct(flat_outputs, inputs, 2)
|
||||
return outputs, final_state
|
||||
|
||||
|
||||
def bw_dynamic_rnn(cell, inputs, sequence_length=None, initial_state=None,
|
||||
dtype=None, parallel_iterations=None, swap_memory=False,
|
||||
time_major=False, scope=None):
|
||||
assert not time_major # TODO : to be implemented later!
|
||||
|
||||
flat_inputs = flatten(inputs, 2) # [-1, J, d]
|
||||
flat_len = None if sequence_length is None else tf.cast(flatten(sequence_length, 0), 'int64')
|
||||
|
||||
flat_inputs = tf.reverse(flat_inputs, 1) if sequence_length is None \
|
||||
else tf.reverse_sequence(flat_inputs, sequence_length, 1)
|
||||
flat_outputs, final_state = _dynamic_rnn(cell, flat_inputs, sequence_length=flat_len,
|
||||
initial_state=initial_state, dtype=dtype,
|
||||
parallel_iterations=parallel_iterations, swap_memory=swap_memory,
|
||||
time_major=time_major, scope=scope)
|
||||
flat_outputs = tf.reverse(flat_outputs, 1) if sequence_length is None \
|
||||
else tf.reverse_sequence(flat_outputs, sequence_length, 1)
|
||||
|
||||
outputs = reconstruct(flat_outputs, inputs, 2)
|
||||
return outputs, final_state
|
||||
|
||||
|
||||
def bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length=None,
|
||||
initial_state_fw=None, initial_state_bw=None,
|
||||
dtype=None, parallel_iterations=None,
|
||||
swap_memory=False, time_major=False, scope=None):
|
||||
assert not time_major
|
||||
|
||||
flat_inputs = flatten(inputs, 2) # [-1, J, d]
|
||||
flat_len = None if sequence_length is None else tf.cast(flatten(sequence_length, 0), 'int64')
|
||||
|
||||
(flat_fw_outputs, flat_bw_outputs), final_state = \
|
||||
_bidirectional_dynamic_rnn(cell_fw, cell_bw, flat_inputs, sequence_length=flat_len,
|
||||
initial_state_fw=initial_state_fw, initial_state_bw=initial_state_bw,
|
||||
dtype=dtype, parallel_iterations=parallel_iterations, swap_memory=swap_memory,
|
||||
time_major=time_major, scope=scope)
|
||||
|
||||
fw_outputs = reconstruct(flat_fw_outputs, inputs, 2)
|
||||
bw_outputs = reconstruct(flat_bw_outputs, inputs, 2)
|
||||
# FIXME : final state is not reshaped!
|
||||
return (fw_outputs, bw_outputs), final_state
|
||||
|
||||
|
||||
def bidirectional_rnn(cell_fw, cell_bw, inputs,
|
||||
initial_state_fw=None, initial_state_bw=None,
|
||||
dtype=None, sequence_length=None, scope=None):
|
||||
|
||||
flat_inputs = flatten(inputs, 2) # [-1, J, d]
|
||||
flat_len = None if sequence_length is None else tf.cast(flatten(sequence_length, 0), 'int64')
|
||||
|
||||
(flat_fw_outputs, flat_bw_outputs), final_state = \
|
||||
tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, flat_inputs, sequence_length=flat_len,
|
||||
initial_state_fw=initial_state_fw, initial_state_bw=initial_state_bw,
|
||||
dtype=dtype, scope=scope)
|
||||
|
||||
fw_outputs = reconstruct(flat_fw_outputs, inputs, 2)
|
||||
bw_outputs = reconstruct(flat_bw_outputs, inputs, 2)
|
||||
# FIXME : final state is not reshaped!
|
||||
return (fw_outputs, bw_outputs), final_state
|
223
tensorflow/SQuAD/my/tensorflow/rnn_cell.py
Normal file
223
tensorflow/SQuAD/my/tensorflow/rnn_cell.py
Normal file
|
@ -0,0 +1,223 @@
|
|||
import tensorflow as tf
|
||||
from tensorflow.contrib.rnn import DropoutWrapper, RNNCell, LSTMStateTuple
|
||||
|
||||
from my.tensorflow import exp_mask, flatten
|
||||
from my.tensorflow.nn import linear, softsel, double_linear_logits
|
||||
|
||||
|
||||
class SwitchableDropoutWrapper(DropoutWrapper):
|
||||
def __init__(self, cell, is_train, input_keep_prob=1.0, output_keep_prob=1.0,
|
||||
seed=None):
|
||||
super(SwitchableDropoutWrapper, self).__init__(cell, input_keep_prob=input_keep_prob, output_keep_prob=output_keep_prob,
|
||||
seed=seed)
|
||||
self.is_train = is_train
|
||||
|
||||
def __call__(self, inputs, state, scope=None):
|
||||
outputs_do, new_state_do = super(SwitchableDropoutWrapper, self).__call__(inputs, state, scope=scope)
|
||||
tf.get_variable_scope().reuse_variables()
|
||||
outputs, new_state = self._cell(inputs, state, scope)
|
||||
outputs = tf.cond(self.is_train, lambda: outputs_do, lambda: outputs)
|
||||
if isinstance(state, tuple):
|
||||
new_state = state.__class__(*[tf.cond(self.is_train, lambda: new_state_do_i, lambda: new_state_i)
|
||||
for new_state_do_i, new_state_i in zip(new_state_do, new_state)])
|
||||
else:
|
||||
new_state = tf.cond(self.is_train, lambda: new_state_do, lambda: new_state)
|
||||
return outputs, new_state
|
||||
|
||||
|
||||
class TreeRNNCell(RNNCell):
|
||||
def __init__(self, cell, input_size, reduce_func):
|
||||
self._cell = cell
|
||||
self._input_size = input_size
|
||||
self._reduce_func = reduce_func
|
||||
|
||||
def __call__(self, inputs, state, scope=None):
|
||||
"""
|
||||
:param inputs: [N*B, I + B]
|
||||
:param state: [N*B, d]
|
||||
:param scope:
|
||||
:return: [N*B, d]
|
||||
"""
|
||||
with tf.variable_scope(scope or self.__class__.__name__):
|
||||
d = self.state_size
|
||||
x = tf.slice(inputs, [0, 0], [-1, self._input_size]) # [N*B, I]
|
||||
mask = tf.slice(inputs, [0, self._input_size], [-1, -1]) # [N*B, B]
|
||||
B = tf.shape(mask)[1]
|
||||
prev_state = tf.expand_dims(tf.reshape(state, [-1, B, d]), 1) # [N, B, d] -> [N, 1, B, d]
|
||||
mask = tf.tile(tf.expand_dims(tf.reshape(mask, [-1, B, B]), -1), [1, 1, 1, d]) # [N, B, B, d]
|
||||
# prev_state = self._reduce_func(tf.tile(prev_state, [1, B, 1, 1]), 2)
|
||||
prev_state = self._reduce_func(exp_mask(prev_state, mask), 2) # [N, B, d]
|
||||
prev_state = tf.reshape(prev_state, [-1, d]) # [N*B, d]
|
||||
return self._cell(x, prev_state)
|
||||
|
||||
@property
|
||||
def state_size(self):
|
||||
return self._cell.state_size
|
||||
|
||||
@property
|
||||
def output_size(self):
|
||||
return self._cell.output_size
|
||||
|
||||
|
||||
class NoOpCell(RNNCell):
|
||||
def __init__(self, num_units):
|
||||
self._num_units = num_units
|
||||
|
||||
def __call__(self, inputs, state, scope=None):
|
||||
return state, state
|
||||
|
||||
@property
|
||||
def state_size(self):
|
||||
return self._num_units
|
||||
|
||||
@property
|
||||
def output_size(self):
|
||||
return self._num_units
|
||||
|
||||
|
||||
class MatchCell(RNNCell):
|
||||
def __init__(self, cell, input_size, q_len):
|
||||
self._cell = cell
|
||||
self._input_size = input_size
|
||||
# FIXME : This won't be needed with good shape guessing
|
||||
self._q_len = q_len
|
||||
|
||||
@property
|
||||
def state_size(self):
|
||||
return self._cell.state_size
|
||||
|
||||
@property
|
||||
def output_size(self):
|
||||
return self._cell.output_size
|
||||
|
||||
def __call__(self, inputs, state, scope=None):
|
||||
"""
|
||||
|
||||
:param inputs: [N, d + JQ + JQ * d]
|
||||
:param state: [N, d]
|
||||
:param scope:
|
||||
:return:
|
||||
"""
|
||||
with tf.variable_scope(scope or self.__class__.__name__):
|
||||
c_prev, h_prev = state
|
||||
x = tf.slice(inputs, [0, 0], [-1, self._input_size])
|
||||
q_mask = tf.slice(inputs, [0, self._input_size], [-1, self._q_len]) # [N, JQ]
|
||||
qs = tf.slice(inputs, [0, self._input_size + self._q_len], [-1, -1])
|
||||
qs = tf.reshape(qs, [-1, self._q_len, self._input_size]) # [N, JQ, d]
|
||||
x_tiled = tf.tile(tf.expand_dims(x, 1), [1, self._q_len, 1]) # [N, JQ, d]
|
||||
h_prev_tiled = tf.tile(tf.expand_dims(h_prev, 1), [1, self._q_len, 1]) # [N, JQ, d]
|
||||
f = tf.tanh(linear([qs, x_tiled, h_prev_tiled], self._input_size, True, scope='f')) # [N, JQ, d]
|
||||
a = tf.nn.softmax(exp_mask(linear(f, 1, True, squeeze=True, scope='a'), q_mask)) # [N, JQ]
|
||||
q = tf.reduce_sum(qs * tf.expand_dims(a, -1), 1)
|
||||
z = tf.concat(axis=1, values=[x, q]) # [N, 2d]
|
||||
return self._cell(z, state)
|
||||
|
||||
|
||||
class AttentionCell(RNNCell):
|
||||
def __init__(self, cell, memory, mask=None, controller=None, mapper=None, input_keep_prob=1.0, is_train=None):
|
||||
"""
|
||||
Early fusion attention cell: uses the (inputs, state) to control the current attention.
|
||||
|
||||
:param cell:
|
||||
:param memory: [N, M, m]
|
||||
:param mask:
|
||||
:param controller: (inputs, prev_state, memory) -> memory_logits
|
||||
"""
|
||||
self._cell = cell
|
||||
self._memory = memory
|
||||
self._mask = mask
|
||||
self._flat_memory = flatten(memory, 2)
|
||||
self._flat_mask = flatten(mask, 1)
|
||||
if controller is None:
|
||||
controller = AttentionCell.get_linear_controller(True, is_train=is_train)
|
||||
self._controller = controller
|
||||
if mapper is None:
|
||||
mapper = AttentionCell.get_concat_mapper()
|
||||
elif mapper == 'sim':
|
||||
mapper = AttentionCell.get_sim_mapper()
|
||||
self._mapper = mapper
|
||||
|
||||
@property
|
||||
def state_size(self):
|
||||
return self._cell.state_size
|
||||
|
||||
@property
|
||||
def output_size(self):
|
||||
return self._cell.output_size
|
||||
|
||||
def __call__(self, inputs, state, scope=None):
|
||||
with tf.variable_scope(scope or "AttentionCell"):
|
||||
memory_logits = self._controller(inputs, state, self._flat_memory)
|
||||
sel_mem = softsel(self._flat_memory, memory_logits, mask=self._flat_mask) # [N, m]
|
||||
new_inputs, new_state = self._mapper(inputs, state, sel_mem)
|
||||
return self._cell(new_inputs, state)
|
||||
|
||||
@staticmethod
|
||||
def get_double_linear_controller(size, bias, input_keep_prob=1.0, is_train=None):
|
||||
def double_linear_controller(inputs, state, memory):
|
||||
"""
|
||||
|
||||
:param inputs: [N, i]
|
||||
:param state: [N, d]
|
||||
:param memory: [N, M, m]
|
||||
:return: [N, M]
|
||||
"""
|
||||
rank = len(memory.get_shape())
|
||||
_memory_size = tf.shape(memory)[rank-2]
|
||||
tiled_inputs = tf.tile(tf.expand_dims(inputs, 1), [1, _memory_size, 1])
|
||||
if isinstance(state, tuple):
|
||||
tiled_states = [tf.tile(tf.expand_dims(each, 1), [1, _memory_size, 1])
|
||||
for each in state]
|
||||
else:
|
||||
tiled_states = [tf.tile(tf.expand_dims(state, 1), [1, _memory_size, 1])]
|
||||
|
||||
# [N, M, d]
|
||||
in_ = tf.concat([tiled_inputs] + tiled_states + [memory], axis=2)
|
||||
out = double_linear_logits(in_, size, bias, input_keep_prob=input_keep_prob,
|
||||
is_train=is_train)
|
||||
return out
|
||||
return double_linear_controller
|
||||
|
||||
@staticmethod
|
||||
def get_linear_controller(bias, input_keep_prob=1.0, is_train=None):
|
||||
def linear_controller(inputs, state, memory):
|
||||
rank = len(memory.get_shape())
|
||||
_memory_size = tf.shape(memory)[rank-2]
|
||||
tiled_inputs = tf.tile(tf.expand_dims(inputs, 1), [1, _memory_size, 1])
|
||||
if isinstance(state, tuple):
|
||||
tiled_states = [tf.tile(tf.expand_dims(each, 1), [1, _memory_size, 1])
|
||||
for each in state]
|
||||
else:
|
||||
tiled_states = [tf.tile(tf.expand_dims(state, 1), [1, _memory_size, 1])]
|
||||
|
||||
# [N, M, d]
|
||||
in_ = tf.concat([tiled_inputs] + tiled_states + [memory], axis=2)
|
||||
out = linear(in_, 1, bias, squeeze=True, input_keep_prob=input_keep_prob, is_train=is_train)
|
||||
return out
|
||||
return linear_controller
|
||||
|
||||
@staticmethod
|
||||
def get_concat_mapper():
|
||||
def concat_mapper(inputs, state, sel_mem):
|
||||
"""
|
||||
|
||||
:param inputs: [N, i]
|
||||
:param state: [N, d]
|
||||
:param sel_mem: [N, m]
|
||||
:return: (new_inputs, new_state) tuple
|
||||
"""
|
||||
return tf.concat(axis=1, values=[inputs, sel_mem]), state
|
||||
return concat_mapper
|
||||
|
||||
@staticmethod
|
||||
def get_sim_mapper():
|
||||
def sim_mapper(inputs, state, sel_mem):
|
||||
"""
|
||||
Assume that inputs and sel_mem are the same size
|
||||
:param inputs: [N, i]
|
||||
:param state: [N, d]
|
||||
:param sel_mem: [N, i]
|
||||
:return: (new_inputs, new_state) tuple
|
||||
"""
|
||||
return tf.concat(axis=1, values=[inputs, sel_mem, inputs * sel_mem, tf.abs(inputs - sel_mem)]), state
|
||||
return sim_mapper
|
58
tensorflow/SQuAD/my/utils.py
Normal file
58
tensorflow/SQuAD/my/utils.py
Normal file
|
@ -0,0 +1,58 @@
|
|||
import json
|
||||
from collections import deque
|
||||
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
def mytqdm(list_, desc="", show=True):
|
||||
if show:
|
||||
pbar = tqdm(list_)
|
||||
pbar.set_description(desc)
|
||||
return pbar
|
||||
return list_
|
||||
|
||||
|
||||
def json_pretty_dump(obj, fh):
|
||||
return json.dump(obj, fh, sort_keys=True, indent=2, separators=(',', ': '))
|
||||
|
||||
|
||||
def index(l, i):
|
||||
return index(l[i[0]], i[1:]) if len(i) > 1 else l[i[0]]
|
||||
|
||||
|
||||
def fill(l, shape, dtype=None):
|
||||
out = np.zeros(shape, dtype=dtype)
|
||||
stack = deque()
|
||||
stack.appendleft(((), l))
|
||||
while len(stack) > 0:
|
||||
indices, cur = stack.pop()
|
||||
if len(indices) < shape:
|
||||
for i, sub in enumerate(cur):
|
||||
stack.appendleft([indices + (i,), sub])
|
||||
else:
|
||||
out[indices] = cur
|
||||
return out
|
||||
|
||||
|
||||
def short_floats(o, precision):
|
||||
class ShortFloat(float):
|
||||
def __repr__(self):
|
||||
return '%.{}g'.format(precision) % self
|
||||
|
||||
def _short_floats(obj):
|
||||
if isinstance(obj, float):
|
||||
return ShortFloat(obj)
|
||||
elif isinstance(obj, dict):
|
||||
return dict((k, _short_floats(v)) for k, v in obj.items())
|
||||
elif isinstance(obj, (list, tuple)):
|
||||
return tuple(map(_short_floats, obj))
|
||||
return obj
|
||||
|
||||
return _short_floats(o)
|
||||
|
||||
|
||||
def argmax(x):
|
||||
return np.unravel_index(x.argmax(), x.shape)
|
||||
|
||||
|
50
tensorflow/SQuAD/my/zip_save.py
Normal file
50
tensorflow/SQuAD/my/zip_save.py
Normal file
|
@ -0,0 +1,50 @@
|
|||
import argparse
|
||||
import os
|
||||
|
||||
import shutil
|
||||
from zipfile import ZipFile
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
def get_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('paths', nargs='+')
|
||||
parser.add_argument('-o', '--out', default='save.zip')
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def zip_save(args):
|
||||
temp_dir = "."
|
||||
save_dir = os.path.join(temp_dir, "save")
|
||||
if not os.path.exists(save_dir):
|
||||
os.makedirs(save_dir)
|
||||
for save_source_path in tqdm(args.paths):
|
||||
# path = "out/basic/30/save/basic-18000"
|
||||
# target_path = "save_dir/30/save"
|
||||
# also output full path name to "save_dir/30/readme.txt
|
||||
# need to also extract "out/basic/30/shared.json"
|
||||
temp, _ = os.path.split(save_source_path) # "out/basic/30/save", _
|
||||
model_dir, _ = os.path.split(temp) # "out/basic/30, _
|
||||
_, model_name = os.path.split(model_dir)
|
||||
cur_dir = os.path.join(save_dir, model_name)
|
||||
if not os.path.exists(cur_dir):
|
||||
os.makedirs(cur_dir)
|
||||
save_target_path = os.path.join(cur_dir, "save")
|
||||
shared_target_path = os.path.join(cur_dir, "shared.json")
|
||||
readme_path = os.path.join(cur_dir, "readme.txt")
|
||||
shared_source_path = os.path.join(model_dir, "shared.json")
|
||||
shutil.copy(save_source_path, save_target_path)
|
||||
shutil.copy(shared_source_path, shared_target_path)
|
||||
with open(readme_path, 'w') as fh:
|
||||
fh.write(save_source_path)
|
||||
|
||||
os.system("zip {} -r {}".format(args.out, save_dir))
|
||||
|
||||
def main():
|
||||
args = get_args()
|
||||
zip_save(args)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
3
tensorflow/SQuAD/requirements.txt
Normal file
3
tensorflow/SQuAD/requirements.txt
Normal file
|
@ -0,0 +1,3 @@
|
|||
nltk
|
||||
tqdm
|
||||
jinja2
|
1
tensorflow/SQuAD/run_training.sh
Executable file
1
tensorflow/SQuAD/run_training.sh
Executable file
|
@ -0,0 +1 @@
|
|||
python3 -m basic.cli --mode train --noload --len_opt --cluster
|
0
tensorflow/SQuAD/squad/__init__.py
Normal file
0
tensorflow/SQuAD/squad/__init__.py
Normal file
157
tensorflow/SQuAD/squad/aug_squad.py
Normal file
157
tensorflow/SQuAD/squad/aug_squad.py
Normal file
|
@ -0,0 +1,157 @@
|
|||
import json
|
||||
import sys
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
from my.corenlp_interface import CoreNLPInterface
|
||||
|
||||
in_path = sys.argv[1]
|
||||
out_path = sys.argv[2]
|
||||
url = sys.argv[3]
|
||||
port = int(sys.argv[4])
|
||||
data = json.load(open(in_path, 'r'))
|
||||
|
||||
h = CoreNLPInterface(url, port)
|
||||
|
||||
|
||||
def find_all(a_str, sub):
|
||||
start = 0
|
||||
while True:
|
||||
start = a_str.find(sub, start)
|
||||
if start == -1: return
|
||||
yield start
|
||||
start += len(sub) # use start += 1 to find overlapping matches
|
||||
|
||||
|
||||
def to_hex(s):
|
||||
return " ".join(map(hex, map(ord, s)))
|
||||
|
||||
|
||||
def handle_nobreak(cand, text):
|
||||
if cand == text:
|
||||
return cand
|
||||
if cand.replace(u'\u00A0', ' ') == text:
|
||||
return cand
|
||||
elif cand == text.replace(u'\u00A0', ' '):
|
||||
return text
|
||||
raise Exception("{} '{}' {} '{}'".format(cand, to_hex(cand), text, to_hex(text)))
|
||||
|
||||
|
||||
# resolving unicode complication
|
||||
|
||||
wrong_loc_count = 0
|
||||
loc_diffs = []
|
||||
|
||||
for article in data['data']:
|
||||
for para in article['paragraphs']:
|
||||
para['context'] = para['context'].replace(u'\u000A', '')
|
||||
para['context'] = para['context'].replace(u'\u00A0', ' ')
|
||||
context = para['context']
|
||||
for qa in para['qas']:
|
||||
for answer in qa['answers']:
|
||||
answer['text'] = answer['text'].replace(u'\u00A0', ' ')
|
||||
text = answer['text']
|
||||
answer_start = answer['answer_start']
|
||||
if context[answer_start:answer_start + len(text)] == text:
|
||||
if text.lstrip() == text:
|
||||
pass
|
||||
else:
|
||||
answer_start += len(text) - len(text.lstrip())
|
||||
answer['answer_start'] = answer_start
|
||||
text = text.lstrip()
|
||||
answer['text'] = text
|
||||
else:
|
||||
wrong_loc_count += 1
|
||||
text = text.lstrip()
|
||||
answer['text'] = text
|
||||
starts = list(find_all(context, text))
|
||||
if len(starts) == 1:
|
||||
answer_start = starts[0]
|
||||
elif len(starts) > 1:
|
||||
new_answer_start = min(starts, key=lambda s: abs(s - answer_start))
|
||||
loc_diffs.append(abs(new_answer_start - answer_start))
|
||||
answer_start = new_answer_start
|
||||
else:
|
||||
raise Exception()
|
||||
answer['answer_start'] = answer_start
|
||||
|
||||
answer_stop = answer_start + len(text)
|
||||
answer['answer_stop'] = answer_stop
|
||||
assert para['context'][answer_start:answer_stop] == answer['text'], "{} {}".format(
|
||||
para['context'][answer_start:answer_stop], answer['text'])
|
||||
|
||||
print(wrong_loc_count, loc_diffs)
|
||||
|
||||
mismatch_count = 0
|
||||
dep_fail_count = 0
|
||||
no_answer_count = 0
|
||||
|
||||
size = sum(len(article['paragraphs']) for article in data['data'])
|
||||
pbar = tqdm(range(size))
|
||||
|
||||
for ai, article in enumerate(data['data']):
|
||||
for pi, para in enumerate(article['paragraphs']):
|
||||
context = para['context']
|
||||
sents = h.split_doc(context)
|
||||
words = h.split_sent(context)
|
||||
sent_starts = []
|
||||
ref_idx = 0
|
||||
for sent in sents:
|
||||
new_idx = context.find(sent, ref_idx)
|
||||
sent_starts.append(new_idx)
|
||||
ref_idx = new_idx + len(sent)
|
||||
para['sents'] = sents
|
||||
para['words'] = words
|
||||
para['sent_starts'] = sent_starts
|
||||
|
||||
consts = list(map(h.get_const, sents))
|
||||
para['consts'] = consts
|
||||
deps = list(map(h.get_dep, sents))
|
||||
para['deps'] = deps
|
||||
|
||||
for qa in para['qas']:
|
||||
question = qa['question']
|
||||
question_const = h.get_const(question)
|
||||
qa['const'] = question_const
|
||||
question_dep = h.get_dep(question)
|
||||
qa['dep'] = question_dep
|
||||
qa['words'] = h.split_sent(question)
|
||||
|
||||
for answer in qa['answers']:
|
||||
answer_start = answer['answer_start']
|
||||
text = answer['text']
|
||||
answer_stop = answer_start + len(text)
|
||||
# answer_words = h.split_sent(text)
|
||||
word_idxs = []
|
||||
answer_words = []
|
||||
for sent_idx, (sent, sent_start, dep) in enumerate(zip(sents, sent_starts, deps)):
|
||||
if dep is None:
|
||||
print("dep parse failed at {} {} {}".format(ai, pi, sent_idx))
|
||||
dep_fail_count += 1
|
||||
continue
|
||||
nodes, edges = dep
|
||||
words = [node[0] for node in nodes]
|
||||
|
||||
for word_idx, (word, _, _, start, _) in enumerate(nodes):
|
||||
global_start = sent_start + start
|
||||
global_stop = global_start + len(word)
|
||||
if answer_start <= global_start < answer_stop or answer_start < global_stop <= answer_stop:
|
||||
word_idxs.append((sent_idx, word_idx))
|
||||
answer_words.append(word)
|
||||
if len(word_idxs) > 0:
|
||||
answer['answer_word_start'] = word_idxs[0]
|
||||
answer['answer_word_stop'] = word_idxs[-1][0], word_idxs[-1][1] + 1
|
||||
if not text.startswith(answer_words[0]):
|
||||
print("'{}' '{}'".format(text, ' '.join(answer_words)))
|
||||
mismatch_count += 1
|
||||
else:
|
||||
answer['answer_word_start'] = None
|
||||
answer['answer_word_stop'] = None
|
||||
no_answer_count += 1
|
||||
pbar.update(1)
|
||||
pbar.close()
|
||||
|
||||
print(mismatch_count, dep_fail_count, no_answer_count)
|
||||
|
||||
print("saving...")
|
||||
json.dump(data, open(out_path, 'w'))
|
271
tensorflow/SQuAD/squad/eda_aug_dev.ipynb
Normal file
271
tensorflow/SQuAD/squad/eda_aug_dev.ipynb
Normal file
|
@ -0,0 +1,271 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"\n",
|
||||
"aug_data_path = \"/Users/minjoons/data/squad/dev-v1.0-aug.json\"\n",
|
||||
"aug_data = json.load(open(aug_data_path, 'r'))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"(['Denver', 'Broncos'], 'Denver Broncos')\n",
|
||||
"(['Denver', 'Broncos'], 'Denver Broncos')\n",
|
||||
"(['Denver', 'Broncos'], 'Denver Broncos ')\n",
|
||||
"(['Carolina', 'Panthers'], 'Carolina Panthers')\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def compare_answers():\n",
|
||||
" for article in aug_data['data']:\n",
|
||||
" for para in article['paragraphs']:\n",
|
||||
" deps = para['deps']\n",
|
||||
" nodess = []\n",
|
||||
" for dep in deps:\n",
|
||||
" nodes, edges = dep\n",
|
||||
" if dep is not None:\n",
|
||||
" nodess.append(nodes)\n",
|
||||
" else:\n",
|
||||
" nodess.append([])\n",
|
||||
" wordss = [[node[0] for node in nodes] for nodes in nodess]\n",
|
||||
" for qa in para['qas']:\n",
|
||||
" for answer in qa['answers']:\n",
|
||||
" text = answer['text']\n",
|
||||
" word_start = answer['answer_word_start']\n",
|
||||
" word_stop = answer['answer_word_stop']\n",
|
||||
" answer_words = wordss[word_start[0]][word_start[1]:word_stop[1]]\n",
|
||||
" yield answer_words, text\n",
|
||||
"\n",
|
||||
"ca = compare_answers()\n",
|
||||
"print(next(ca))\n",
|
||||
"print(next(ca))\n",
|
||||
"print(next(ca))\n",
|
||||
"print(next(ca))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"8\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def counter():\n",
|
||||
" count = 0\n",
|
||||
" for article in aug_data['data']:\n",
|
||||
" for para in article['paragraphs']:\n",
|
||||
" deps = para['deps']\n",
|
||||
" nodess = []\n",
|
||||
" for dep in deps:\n",
|
||||
" if dep is None:\n",
|
||||
" count += 1\n",
|
||||
" print(count)\n",
|
||||
"counter()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def bad_node_counter():\n",
|
||||
" count = 0\n",
|
||||
" for article in aug_data['data']:\n",
|
||||
" for para in article['paragraphs']:\n",
|
||||
" sents = para['sents']\n",
|
||||
" deps = para['deps']\n",
|
||||
" nodess = []\n",
|
||||
" for dep in deps:\n",
|
||||
" if dep is not None:\n",
|
||||
" nodes, edges = dep\n",
|
||||
" for node in nodes:\n",
|
||||
" if len(node) != 5:\n",
|
||||
" count += 1\n",
|
||||
" print(count)\n",
|
||||
"bad_node_counter() "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"7\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def noanswer_counter():\n",
|
||||
" count = 0\n",
|
||||
" for article in aug_data['data']:\n",
|
||||
" for para in article['paragraphs']:\n",
|
||||
" deps = para['deps']\n",
|
||||
" nodess = []\n",
|
||||
" for dep in deps:\n",
|
||||
" if dep is not None:\n",
|
||||
" nodes, edges = dep\n",
|
||||
" nodess.append(nodes)\n",
|
||||
" else:\n",
|
||||
" nodess.append([])\n",
|
||||
" wordss = [[node[0] for node in nodes] for nodes in nodess]\n",
|
||||
" for qa in para['qas']:\n",
|
||||
" for answer in qa['answers']:\n",
|
||||
" text = answer['text']\n",
|
||||
" word_start = answer['answer_word_start']\n",
|
||||
" word_stop = answer['answer_word_stop']\n",
|
||||
" if word_start is None:\n",
|
||||
" count += 1\n",
|
||||
" print(count)\n",
|
||||
"noanswer_counter()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"10600\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(sum(len(para['qas']) for a in aug_data['data'] for para in a['paragraphs']))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"10348\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import nltk\n",
|
||||
"\n",
|
||||
"def _set_span(t, i):\n",
|
||||
" if isinstance(t[0], str):\n",
|
||||
" t.span = (i, i+len(t))\n",
|
||||
" else:\n",
|
||||
" first = True\n",
|
||||
" for c in t:\n",
|
||||
" cur_span = _set_span(c, i)\n",
|
||||
" i = cur_span[1]\n",
|
||||
" if first:\n",
|
||||
" min_ = cur_span[0]\n",
|
||||
" first = False\n",
|
||||
" max_ = cur_span[1]\n",
|
||||
" t.span = (min_, max_)\n",
|
||||
" return t.span\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def set_span(t):\n",
|
||||
" assert isinstance(t, nltk.tree.Tree)\n",
|
||||
" try:\n",
|
||||
" return _set_span(t, 0)\n",
|
||||
" except:\n",
|
||||
" print(t)\n",
|
||||
" exit()\n",
|
||||
"\n",
|
||||
"def same_span_counter():\n",
|
||||
" count = 0\n",
|
||||
" for article in aug_data['data']:\n",
|
||||
" for para in article['paragraphs']:\n",
|
||||
" consts = para['consts']\n",
|
||||
" for const in consts:\n",
|
||||
" tree = nltk.tree.Tree.fromstring(const)\n",
|
||||
" set_span(tree)\n",
|
||||
" if len(list(tree.subtrees())) > len(set(t.span for t in tree.subtrees())):\n",
|
||||
" count += 1\n",
|
||||
" print(count)\n",
|
||||
"same_span_counter()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.5.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
314
tensorflow/SQuAD/squad/eda_aug_train.ipynb
Normal file
314
tensorflow/SQuAD/squad/eda_aug_train.ipynb
Normal file
|
@ -0,0 +1,314 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"\n",
|
||||
"aug_data_path = \"/Users/minjoons/data/squad/train-v1.0-aug.json\"\n",
|
||||
"aug_data = json.load(open(aug_data_path, 'r'))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"(['Saint', 'Bernadette', 'Soubirous'], 'Saint Bernadette Soubirous')\n",
|
||||
"(['a', 'copper', 'statue', 'of', 'Christ'], 'a copper statue of Christ')\n",
|
||||
"(['the', 'Main', 'Building'], 'the Main Building')\n",
|
||||
"(['a', 'Marian', 'place', 'of', 'prayer', 'and', 'reflection'], 'a Marian place of prayer and reflection')\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def compare_answers():\n",
|
||||
" for article in aug_data['data']:\n",
|
||||
" for para in article['paragraphs']:\n",
|
||||
" deps = para['deps']\n",
|
||||
" nodess = []\n",
|
||||
" for dep in deps:\n",
|
||||
" nodes, edges = dep\n",
|
||||
" if dep is not None:\n",
|
||||
" nodess.append(nodes)\n",
|
||||
" else:\n",
|
||||
" nodess.append([])\n",
|
||||
" wordss = [[node[0] for node in nodes] for nodes in nodess]\n",
|
||||
" for qa in para['qas']:\n",
|
||||
" for answer in qa['answers']:\n",
|
||||
" text = answer['text']\n",
|
||||
" word_start = answer['answer_word_start']\n",
|
||||
" word_stop = answer['answer_word_stop']\n",
|
||||
" answer_words = wordss[word_start[0]][word_start[1]:word_stop[1]]\n",
|
||||
" yield answer_words, text\n",
|
||||
"\n",
|
||||
"ca = compare_answers()\n",
|
||||
"print(next(ca))\n",
|
||||
"print(next(ca))\n",
|
||||
"print(next(ca))\n",
|
||||
"print(next(ca))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"q: k\n",
|
||||
"q: j\n",
|
||||
"q: n\n",
|
||||
"q: b\n",
|
||||
"q: v\n",
|
||||
"x: .\n",
|
||||
"x: :208\n",
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"q: dd\n",
|
||||
"q: dd\n",
|
||||
"q: dd\n",
|
||||
"q: dd\n",
|
||||
"q: d\n",
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"x: :411\n",
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"x: :40\n",
|
||||
"x: .\n",
|
||||
"x: *\n",
|
||||
"x: :14\n",
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"x: :131\n",
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"x: .\n",
|
||||
"53 10\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def nodep_counter():\n",
|
||||
" x_count = 0\n",
|
||||
" q_count = 0\n",
|
||||
" for article in aug_data['data']:\n",
|
||||
" for para in article['paragraphs']:\n",
|
||||
" deps = para['deps']\n",
|
||||
" nodess = []\n",
|
||||
" for sent, dep in zip(para['sents'], deps):\n",
|
||||
" if dep is None:\n",
|
||||
" print(\"x:\", sent)\n",
|
||||
" x_count += 1\n",
|
||||
" for qa in para['qas']:\n",
|
||||
" if qa['dep'] is None:\n",
|
||||
" print(\"q:\", qa['question'])\n",
|
||||
" q_count += 1\n",
|
||||
" print(x_count, q_count)\n",
|
||||
"nodep_counter()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def bad_node_counter():\n",
|
||||
" count = 0\n",
|
||||
" for article in aug_data['data']:\n",
|
||||
" for para in article['paragraphs']:\n",
|
||||
" sents = para['sents']\n",
|
||||
" deps = para['deps']\n",
|
||||
" nodess = []\n",
|
||||
" for dep in deps:\n",
|
||||
" if dep is not None:\n",
|
||||
" nodes, edges = dep\n",
|
||||
" for node in nodes:\n",
|
||||
" if len(node) != 5:\n",
|
||||
" count += 1\n",
|
||||
" print(count)\n",
|
||||
"bad_node_counter() "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"36\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def noanswer_counter():\n",
|
||||
" count = 0\n",
|
||||
" for article in aug_data['data']:\n",
|
||||
" for para in article['paragraphs']:\n",
|
||||
" deps = para['deps']\n",
|
||||
" nodess = []\n",
|
||||
" for dep in deps:\n",
|
||||
" if dep is not None:\n",
|
||||
" nodes, edges = dep\n",
|
||||
" nodess.append(nodes)\n",
|
||||
" else:\n",
|
||||
" nodess.append([])\n",
|
||||
" wordss = [[node[0] for node in nodes] for nodes in nodess]\n",
|
||||
" for qa in para['qas']:\n",
|
||||
" for answer in qa['answers']:\n",
|
||||
" text = answer['text']\n",
|
||||
" word_start = answer['answer_word_start']\n",
|
||||
" word_stop = answer['answer_word_stop']\n",
|
||||
" if word_start is None:\n",
|
||||
" count += 1\n",
|
||||
" print(count)\n",
|
||||
"noanswer_counter()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"106\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def mult_sent_answer_counter():\n",
|
||||
" count = 0\n",
|
||||
" for article in aug_data['data']:\n",
|
||||
" for para in article['paragraphs']:\n",
|
||||
" for qa in para['qas']:\n",
|
||||
" for answer in qa['answers']:\n",
|
||||
" text = answer['text']\n",
|
||||
" word_start = answer['answer_word_start']\n",
|
||||
" word_stop = answer['answer_word_stop']\n",
|
||||
" if word_start is not None and word_start[0] != word_stop[0]:\n",
|
||||
" count += 1\n",
|
||||
" print(count)\n",
|
||||
"mult_sent_answer_counter()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.5.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
94
tensorflow/SQuAD/squad/evaluate-v1.1.py
Normal file
94
tensorflow/SQuAD/squad/evaluate-v1.1.py
Normal file
|
@ -0,0 +1,94 @@
|
|||
""" Official evaluation script for v1.1 of the SQuAD dataset. """
|
||||
from __future__ import print_function
|
||||
from collections import Counter
|
||||
import string
|
||||
import re
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
|
||||
|
||||
def normalize_answer(s):
|
||||
"""Lower text and remove punctuation, articles and extra whitespace."""
|
||||
def remove_articles(text):
|
||||
return re.sub(r'\b(a|an|the)\b', ' ', text)
|
||||
|
||||
def white_space_fix(text):
|
||||
return ' '.join(text.split())
|
||||
|
||||
def remove_punc(text):
|
||||
exclude = set(string.punctuation)
|
||||
return ''.join(ch for ch in text if ch not in exclude)
|
||||
|
||||
def lower(text):
|
||||
return text.lower()
|
||||
|
||||
return white_space_fix(remove_articles(remove_punc(lower(s))))
|
||||
|
||||
|
||||
def f1_score(prediction, ground_truth):
|
||||
prediction_tokens = normalize_answer(prediction).split()
|
||||
ground_truth_tokens = normalize_answer(ground_truth).split()
|
||||
common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
|
||||
num_same = sum(common.values())
|
||||
if num_same == 0:
|
||||
return 0
|
||||
precision = 1.0 * num_same / len(prediction_tokens)
|
||||
recall = 1.0 * num_same / len(ground_truth_tokens)
|
||||
f1 = (2 * precision * recall) / (precision + recall)
|
||||
return f1
|
||||
|
||||
|
||||
def exact_match_score(prediction, ground_truth):
|
||||
return (normalize_answer(prediction) == normalize_answer(ground_truth))
|
||||
|
||||
|
||||
def metric_max_over_ground_truths(metric_fn, prediction, ground_truths):
|
||||
scores_for_ground_truths = []
|
||||
for ground_truth in ground_truths:
|
||||
score = metric_fn(prediction, ground_truth)
|
||||
scores_for_ground_truths.append(score)
|
||||
return max(scores_for_ground_truths)
|
||||
|
||||
|
||||
def evaluate(dataset, predictions):
|
||||
f1 = exact_match = total = 0
|
||||
for article in dataset:
|
||||
for paragraph in article['paragraphs']:
|
||||
for qa in paragraph['qas']:
|
||||
total += 1
|
||||
if qa['id'] not in predictions:
|
||||
message = 'Unanswered question ' + qa['id'] + \
|
||||
' will receive score 0.'
|
||||
print(message, file=sys.stderr)
|
||||
continue
|
||||
ground_truths = list(map(lambda x: x['text'], qa['answers']))
|
||||
prediction = predictions[qa['id']]
|
||||
exact_match += metric_max_over_ground_truths(
|
||||
exact_match_score, prediction, ground_truths)
|
||||
f1 += metric_max_over_ground_truths(
|
||||
f1_score, prediction, ground_truths)
|
||||
|
||||
exact_match = 100.0 * exact_match / total
|
||||
f1 = 100.0 * f1 / total
|
||||
|
||||
return {'exact_match': exact_match, 'f1': f1}
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
expected_version = '1.1'
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Evaluation for SQuAD ' + expected_version)
|
||||
parser.add_argument('dataset_file', help='Dataset file')
|
||||
parser.add_argument('prediction_file', help='Prediction File')
|
||||
args = parser.parse_args()
|
||||
with open(args.dataset_file) as dataset_file:
|
||||
dataset_json = json.load(dataset_file)
|
||||
if (dataset_json['version'] != expected_version):
|
||||
print('Evaluation expects v-' + expected_version +
|
||||
', but got dataset with v-' + dataset_json['version'],
|
||||
file=sys.stderr)
|
||||
dataset = dataset_json['data']
|
||||
with open(args.prediction_file) as prediction_file:
|
||||
predictions = json.load(prediction_file)
|
||||
print(json.dumps(evaluate(dataset, predictions)))
|
94
tensorflow/SQuAD/squad/evaluate.py
Normal file
94
tensorflow/SQuAD/squad/evaluate.py
Normal file
|
@ -0,0 +1,94 @@
|
|||
""" Official evaluation script for v1.1 of the SQuAD dataset. [Changed name for external importing]"""
|
||||
from __future__ import print_function
|
||||
from collections import Counter
|
||||
import string
|
||||
import re
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
|
||||
|
||||
def normalize_answer(s):
|
||||
"""Lower text and remove punctuation, articles and extra whitespace."""
|
||||
def remove_articles(text):
|
||||
return re.sub(r'\b(a|an|the)\b', ' ', text)
|
||||
|
||||
def white_space_fix(text):
|
||||
return ' '.join(text.split())
|
||||
|
||||
def remove_punc(text):
|
||||
exclude = set(string.punctuation)
|
||||
return ''.join(ch for ch in text if ch not in exclude)
|
||||
|
||||
def lower(text):
|
||||
return text.lower()
|
||||
|
||||
return white_space_fix(remove_articles(remove_punc(lower(s))))
|
||||
|
||||
|
||||
def f1_score(prediction, ground_truth):
|
||||
prediction_tokens = normalize_answer(prediction).split()
|
||||
ground_truth_tokens = normalize_answer(ground_truth).split()
|
||||
common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
|
||||
num_same = sum(common.values())
|
||||
if num_same == 0:
|
||||
return 0
|
||||
precision = 1.0 * num_same / len(prediction_tokens)
|
||||
recall = 1.0 * num_same / len(ground_truth_tokens)
|
||||
f1 = (2 * precision * recall) / (precision + recall)
|
||||
return f1
|
||||
|
||||
|
||||
def exact_match_score(prediction, ground_truth):
|
||||
return (normalize_answer(prediction) == normalize_answer(ground_truth))
|
||||
|
||||
|
||||
def metric_max_over_ground_truths(metric_fn, prediction, ground_truths):
|
||||
scores_for_ground_truths = []
|
||||
for ground_truth in ground_truths:
|
||||
score = metric_fn(prediction, ground_truth)
|
||||
scores_for_ground_truths.append(score)
|
||||
return max(scores_for_ground_truths)
|
||||
|
||||
|
||||
def evaluate(dataset, predictions):
|
||||
f1 = exact_match = total = 0
|
||||
for article in dataset:
|
||||
for paragraph in article['paragraphs']:
|
||||
for qa in paragraph['qas']:
|
||||
total += 1
|
||||
if qa['id'] not in predictions:
|
||||
message = 'Unanswered question ' + qa['id'] + \
|
||||
' will receive score 0.'
|
||||
print(message, file=sys.stderr)
|
||||
continue
|
||||
ground_truths = list(map(lambda x: x['text'], qa['answers']))
|
||||
prediction = predictions[qa['id']]
|
||||
exact_match += metric_max_over_ground_truths(
|
||||
exact_match_score, prediction, ground_truths)
|
||||
f1 += metric_max_over_ground_truths(
|
||||
f1_score, prediction, ground_truths)
|
||||
|
||||
exact_match = 100.0 * exact_match / total
|
||||
f1 = 100.0 * f1 / total
|
||||
|
||||
return {'exact_match': exact_match, 'f1': f1}
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
expected_version = '1.1'
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Evaluation for SQuAD ' + expected_version)
|
||||
parser.add_argument('dataset_file', help='Dataset file')
|
||||
parser.add_argument('prediction_file', help='Prediction File')
|
||||
args = parser.parse_args()
|
||||
with open(args.dataset_file) as dataset_file:
|
||||
dataset_json = json.load(dataset_file)
|
||||
if (dataset_json['version'] != expected_version):
|
||||
print('Evaluation expects v-' + expected_version +
|
||||
', but got dataset with v-' + dataset_json['version'],
|
||||
file=sys.stderr)
|
||||
dataset = dataset_json['data']
|
||||
with open(args.prediction_file) as prediction_file:
|
||||
predictions = json.load(prediction_file)
|
||||
print(json.dumps(evaluate(dataset, predictions)))
|
50
tensorflow/SQuAD/squad/neg_squad.py
Normal file
50
tensorflow/SQuAD/squad/neg_squad.py
Normal file
|
@ -0,0 +1,50 @@
|
|||
import argparse
|
||||
import json
|
||||
import os
|
||||
# data: q, cq, (dq), (pq), y, *x, *cx
|
||||
# shared: x, cx, (dx), (px), word_counter, char_counter, word2vec
|
||||
# no metadata
|
||||
import random
|
||||
from collections import Counter
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
from squad.utils import get_word_span, get_word_idx, process_tokens
|
||||
|
||||
|
||||
def main():
|
||||
args = get_args()
|
||||
neg_squad(args)
|
||||
|
||||
|
||||
def get_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
home = os.path.expanduser("~")
|
||||
parser.add_argument("source_path")
|
||||
parser.add_argument("target_path")
|
||||
parser.add_argument('-d', "--debug", action='store_true')
|
||||
parser.add_argument('-r', "--aug_ratio", default=1, type=int)
|
||||
# TODO : put more args here
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def neg_squad(args):
|
||||
with open(args.source_path, 'r') as fp:
|
||||
squad = json.load(fp)
|
||||
with open(args.source_path, 'r') as fp:
|
||||
ref_squad = json.load(fp)
|
||||
|
||||
for ai, article in enumerate(ref_squad['data']):
|
||||
for pi, para in enumerate(article['paragraphs']):
|
||||
cands = list(range(pi)) + list(range(pi+1, len(article['paragraphs'])))
|
||||
samples = random.sample(cands, args.aug_ratio)
|
||||
for sample in samples:
|
||||
for qi, ques in enumerate(article['paragraphs'][sample]['qas']):
|
||||
new_ques = {'question': ques['question'], 'answers': [], 'answer_start': 0, 'id': "neg_" + ques['id']}
|
||||
squad['data'][ai]['paragraphs'][pi]['qas'].append(new_ques)
|
||||
|
||||
with open(args.target_path, 'w') as fp:
|
||||
json.dump(squad, fp)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
241
tensorflow/SQuAD/squad/prepro.py
Normal file
241
tensorflow/SQuAD/squad/prepro.py
Normal file
|
@ -0,0 +1,241 @@
|
|||
import argparse
|
||||
import json
|
||||
import os
|
||||
# data: q, cq, (dq), (pq), y, *x, *cx
|
||||
# shared: x, cx, (dx), (px), word_counter, char_counter, word2vec
|
||||
# no metadata
|
||||
from collections import Counter
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
from squad.utils import get_word_span, get_word_idx, process_tokens
|
||||
|
||||
|
||||
def main():
|
||||
args = get_args()
|
||||
prepro(args)
|
||||
|
||||
|
||||
def get_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
home = os.path.expanduser("~")
|
||||
source_dir = os.path.join(home, "data", "squad")
|
||||
target_dir = "data/squad"
|
||||
glove_dir = os.path.join(home, "data", "glove")
|
||||
parser.add_argument('-s', "--source_dir", default=source_dir)
|
||||
parser.add_argument('-t', "--target_dir", default=target_dir)
|
||||
parser.add_argument("--train_name", default='train-v1.1.json')
|
||||
parser.add_argument('-d', "--debug", action='store_true')
|
||||
parser.add_argument("--train_ratio", default=0.9, type=int)
|
||||
parser.add_argument("--glove_corpus", default="6B")
|
||||
parser.add_argument("--glove_dir", default=glove_dir)
|
||||
parser.add_argument("--glove_vec_size", default=100, type=int)
|
||||
parser.add_argument("--mode", default="full", type=str)
|
||||
parser.add_argument("--single_path", default="", type=str)
|
||||
parser.add_argument("--tokenizer", default="PTB", type=str)
|
||||
parser.add_argument("--url", default="vision-server2.corp.ai2", type=str)
|
||||
parser.add_argument("--port", default=8000, type=int)
|
||||
parser.add_argument("--split", action='store_true')
|
||||
parser.add_argument("--suffix", default="")
|
||||
# TODO : put more args here
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def create_all(args):
|
||||
out_path = os.path.join(args.source_dir, "all-v1.1.json")
|
||||
if os.path.exists(out_path):
|
||||
return
|
||||
train_path = os.path.join(args.source_dir, args.train_name)
|
||||
train_data = json.load(open(train_path, 'r'))
|
||||
dev_path = os.path.join(args.source_dir, args.dev_name)
|
||||
dev_data = json.load(open(dev_path, 'r'))
|
||||
train_data['data'].extend(dev_data['data'])
|
||||
print("dumping all data ...")
|
||||
json.dump(train_data, open(out_path, 'w'))
|
||||
|
||||
|
||||
def prepro(args):
|
||||
if not os.path.exists(args.target_dir):
|
||||
os.makedirs(args.target_dir)
|
||||
|
||||
if args.mode == 'full':
|
||||
prepro_each(args, 'train', out_name='train')
|
||||
prepro_each(args, 'dev', out_name='dev')
|
||||
prepro_each(args, 'dev', out_name='test')
|
||||
elif args.mode == 'all':
|
||||
create_all(args)
|
||||
prepro_each(args, 'dev', 0.0, 0.0, out_name='dev')
|
||||
prepro_each(args, 'dev', 0.0, 0.0, out_name='test')
|
||||
prepro_each(args, 'all', out_name='train')
|
||||
elif args.mode == 'single':
|
||||
assert len(args.single_path) > 0
|
||||
prepro_each(args, "NULL", out_name="single", in_path=args.single_path)
|
||||
else:
|
||||
prepro_each(args, 'train', 0.0, args.train_ratio, out_name='train')
|
||||
prepro_each(args, 'train', args.train_ratio, 1.0, out_name='dev')
|
||||
prepro_each(args, 'dev', out_name='test')
|
||||
|
||||
|
||||
def save(args, data, shared, data_type):
|
||||
data_path = os.path.join(args.target_dir, "data_{}.json".format(data_type))
|
||||
shared_path = os.path.join(args.target_dir, "shared_{}.json".format(data_type))
|
||||
json.dump(data, open(data_path, 'w'))
|
||||
json.dump(shared, open(shared_path, 'w'))
|
||||
|
||||
|
||||
def get_word2vec(args, word_counter):
|
||||
glove_path = os.path.join(args.glove_dir, "glove.{}.{}d.txt".format(args.glove_corpus, args.glove_vec_size))
|
||||
sizes = {'6B': int(4e5), '42B': int(1.9e6), '840B': int(2.2e6), '2B': int(1.2e6)}
|
||||
total = sizes[args.glove_corpus]
|
||||
word2vec_dict = {}
|
||||
with open(glove_path, 'r', encoding='utf-8') as fh:
|
||||
for line in tqdm(fh, total=total):
|
||||
array = line.lstrip().rstrip().split(" ")
|
||||
word = array[0]
|
||||
vector = list(map(float, array[1:]))
|
||||
if word in word_counter:
|
||||
word2vec_dict[word] = vector
|
||||
elif word.capitalize() in word_counter:
|
||||
word2vec_dict[word.capitalize()] = vector
|
||||
elif word.lower() in word_counter:
|
||||
word2vec_dict[word.lower()] = vector
|
||||
elif word.upper() in word_counter:
|
||||
word2vec_dict[word.upper()] = vector
|
||||
|
||||
print("{}/{} of word vocab have corresponding vectors in {}".format(len(word2vec_dict), len(word_counter), glove_path))
|
||||
return word2vec_dict
|
||||
|
||||
|
||||
def prepro_each(args, data_type, start_ratio=0.0, stop_ratio=1.0, out_name="default", in_path=None):
|
||||
if args.tokenizer == "PTB":
|
||||
import nltk
|
||||
sent_tokenize = nltk.sent_tokenize
|
||||
def word_tokenize(tokens):
|
||||
return [token.replace("''", '"').replace("``", '"') for token in nltk.word_tokenize(tokens)]
|
||||
elif args.tokenizer == 'Stanford':
|
||||
from my.corenlp_interface import CoreNLPInterface
|
||||
interface = CoreNLPInterface(args.url, args.port)
|
||||
sent_tokenize = interface.split_doc
|
||||
word_tokenize = interface.split_sent
|
||||
else:
|
||||
raise Exception()
|
||||
|
||||
if not args.split:
|
||||
sent_tokenize = lambda para: [para]
|
||||
|
||||
source_path = in_path or os.path.join(args.source_dir, "{}-{}v1.1.json".format(data_type, args.suffix))
|
||||
source_data = json.load(open(source_path, 'r'))
|
||||
|
||||
q, cq, y, rx, rcx, ids, idxs = [], [], [], [], [], [], []
|
||||
na = []
|
||||
cy = []
|
||||
x, cx = [], []
|
||||
answerss = []
|
||||
p = []
|
||||
word_counter, char_counter, lower_word_counter = Counter(), Counter(), Counter()
|
||||
start_ai = int(round(len(source_data['data']) * start_ratio))
|
||||
stop_ai = int(round(len(source_data['data']) * stop_ratio))
|
||||
for ai, article in enumerate(tqdm(source_data['data'][start_ai:stop_ai])):
|
||||
xp, cxp = [], []
|
||||
pp = []
|
||||
x.append(xp)
|
||||
cx.append(cxp)
|
||||
p.append(pp)
|
||||
for pi, para in enumerate(article['paragraphs']):
|
||||
# wordss
|
||||
context = para['context']
|
||||
context = context.replace("''", '" ')
|
||||
context = context.replace("``", '" ')
|
||||
xi = list(map(word_tokenize, sent_tokenize(context)))
|
||||
xi = [process_tokens(tokens) for tokens in xi] # process tokens
|
||||
# given xi, add chars
|
||||
cxi = [[list(xijk) for xijk in xij] for xij in xi]
|
||||
xp.append(xi)
|
||||
cxp.append(cxi)
|
||||
pp.append(context)
|
||||
|
||||
for xij in xi:
|
||||
for xijk in xij:
|
||||
word_counter[xijk] += len(para['qas'])
|
||||
lower_word_counter[xijk.lower()] += len(para['qas'])
|
||||
for xijkl in xijk:
|
||||
char_counter[xijkl] += len(para['qas'])
|
||||
|
||||
rxi = [ai, pi]
|
||||
assert len(x) - 1 == ai
|
||||
assert len(x[ai]) - 1 == pi
|
||||
for qa in para['qas']:
|
||||
# get words
|
||||
qi = word_tokenize(qa['question'])
|
||||
qi = process_tokens(qi)
|
||||
cqi = [list(qij) for qij in qi]
|
||||
yi = []
|
||||
cyi = []
|
||||
answers = []
|
||||
for answer in qa['answers']:
|
||||
answer_text = answer['text']
|
||||
answers.append(answer_text)
|
||||
answer_start = answer['answer_start']
|
||||
answer_stop = answer_start + len(answer_text)
|
||||
# TODO : put some function that gives word_start, word_stop here
|
||||
yi0, yi1 = get_word_span(context, xi, answer_start, answer_stop)
|
||||
# yi0 = answer['answer_word_start'] or [0, 0]
|
||||
# yi1 = answer['answer_word_stop'] or [0, 1]
|
||||
assert len(xi[yi0[0]]) > yi0[1]
|
||||
assert len(xi[yi1[0]]) >= yi1[1]
|
||||
w0 = xi[yi0[0]][yi0[1]]
|
||||
w1 = xi[yi1[0]][yi1[1]-1]
|
||||
i0 = get_word_idx(context, xi, yi0)
|
||||
i1 = get_word_idx(context, xi, (yi1[0], yi1[1]-1))
|
||||
cyi0 = answer_start - i0
|
||||
cyi1 = answer_stop - i1 - 1
|
||||
# print(answer_text, w0[cyi0:], w1[:cyi1+1])
|
||||
assert answer_text[0] == w0[cyi0], (answer_text, w0, cyi0)
|
||||
assert answer_text[-1] == w1[cyi1]
|
||||
assert cyi0 < 32, (answer_text, w0)
|
||||
assert cyi1 < 32, (answer_text, w1)
|
||||
|
||||
yi.append([yi0, yi1])
|
||||
cyi.append([cyi0, cyi1])
|
||||
|
||||
if len(qa['answers']) == 0:
|
||||
yi.append([(0, 0), (0, 1)])
|
||||
cyi.append([0, 1])
|
||||
na.append(True)
|
||||
else:
|
||||
na.append(False)
|
||||
|
||||
for qij in qi:
|
||||
word_counter[qij] += 1
|
||||
lower_word_counter[qij.lower()] += 1
|
||||
for qijk in qij:
|
||||
char_counter[qijk] += 1
|
||||
|
||||
q.append(qi)
|
||||
cq.append(cqi)
|
||||
y.append(yi)
|
||||
cy.append(cyi)
|
||||
rx.append(rxi)
|
||||
rcx.append(rxi)
|
||||
ids.append(qa['id'])
|
||||
idxs.append(len(idxs))
|
||||
answerss.append(answers)
|
||||
|
||||
if args.debug:
|
||||
break
|
||||
|
||||
word2vec_dict = get_word2vec(args, word_counter)
|
||||
lower_word2vec_dict = get_word2vec(args, lower_word_counter)
|
||||
|
||||
# add context here
|
||||
data = {'q': q, 'cq': cq, 'y': y, '*x': rx, '*cx': rcx, 'cy': cy,
|
||||
'idxs': idxs, 'ids': ids, 'answerss': answerss, '*p': rx, 'na': na}
|
||||
shared = {'x': x, 'cx': cx, 'p': p,
|
||||
'word_counter': word_counter, 'char_counter': char_counter, 'lower_word_counter': lower_word_counter,
|
||||
'word2vec': word2vec_dict, 'lower_word2vec': lower_word2vec_dict}
|
||||
|
||||
print("saving ...")
|
||||
save(args, data, shared, out_name)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
183
tensorflow/SQuAD/squad/prepro_aug.py
Normal file
183
tensorflow/SQuAD/squad/prepro_aug.py
Normal file
|
@ -0,0 +1,183 @@
|
|||
import argparse
|
||||
import json
|
||||
import os
|
||||
# data: q, cq, (dq), (pq), y, *x, *cx
|
||||
# shared: x, cx, (dx), (px), word_counter, char_counter, word2vec
|
||||
# no metadata
|
||||
from collections import Counter
|
||||
|
||||
import nltk
|
||||
from tqdm import tqdm
|
||||
|
||||
from my.nltk_utils import load_compressed_tree
|
||||
|
||||
|
||||
def bool_(arg):
|
||||
if arg == 'True':
|
||||
return True
|
||||
elif arg == 'False':
|
||||
return False
|
||||
raise Exception()
|
||||
|
||||
|
||||
def main():
|
||||
args = get_args()
|
||||
prepro(args)
|
||||
|
||||
|
||||
def get_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
home = os.path.expanduser("~")
|
||||
source_dir = os.path.join(home, "data", "squad")
|
||||
target_dir = "data/squad"
|
||||
glove_dir = os.path.join(home, "data", "glove")
|
||||
parser.add_argument("--source_dir", default=source_dir)
|
||||
parser.add_argument("--target_dir", default=target_dir)
|
||||
parser.add_argument("--debug", default=False, type=bool_)
|
||||
parser.add_argument("--train_ratio", default=0.9, type=int)
|
||||
parser.add_argument("--glove_corpus", default="6B")
|
||||
parser.add_argument("--glove_dir", default=glove_dir)
|
||||
parser.add_argument("--glove_vec_size", default=100, type=int)
|
||||
parser.add_argument("--full_train", default=False, type=bool_)
|
||||
# TODO : put more args here
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def prepro(args):
|
||||
if not os.path.exists(args.target_dir):
|
||||
os.makedirs(args.target_dir)
|
||||
|
||||
if args.full_train:
|
||||
data_train, shared_train = prepro_each(args, 'train')
|
||||
data_dev, shared_dev = prepro_each(args, 'dev')
|
||||
else:
|
||||
data_train, shared_train = prepro_each(args, 'train', 0.0, args.train_ratio)
|
||||
data_dev, shared_dev = prepro_each(args, 'train', args.train_ratio, 1.0)
|
||||
data_test, shared_test = prepro_each(args, 'dev')
|
||||
|
||||
print("saving ...")
|
||||
save(args, data_train, shared_train, 'train')
|
||||
save(args, data_dev, shared_dev, 'dev')
|
||||
save(args, data_test, shared_test, 'test')
|
||||
|
||||
|
||||
def save(args, data, shared, data_type):
|
||||
data_path = os.path.join(args.target_dir, "data_{}.json".format(data_type))
|
||||
shared_path = os.path.join(args.target_dir, "shared_{}.json".format(data_type))
|
||||
json.dump(data, open(data_path, 'w'))
|
||||
json.dump(shared, open(shared_path, 'w'))
|
||||
|
||||
|
||||
def get_word2vec(args, word_counter):
|
||||
glove_path = os.path.join(args.glove_dir, "glove.{}.{}d.txt".format(args.glove_corpus, args.glove_vec_size))
|
||||
sizes = {'6B': int(4e5), '42B': int(1.9e6), '840B': int(2.2e6), '2B': int(1.2e6)}
|
||||
total = sizes[args.glove_corpus]
|
||||
word2vec_dict = {}
|
||||
with open(glove_path, 'r') as fh:
|
||||
for line in tqdm(fh, total=total):
|
||||
array = line.lstrip().rstrip().split(" ")
|
||||
word = array[0]
|
||||
vector = list(map(float, array[1:]))
|
||||
if word in word_counter:
|
||||
word2vec_dict[word] = vector
|
||||
elif word.capitalize() in word_counter:
|
||||
word2vec_dict[word.capitalize()] = vector
|
||||
elif word.lower() in word_counter:
|
||||
word2vec_dict[word.lower()] = vector
|
||||
elif word.upper() in word_counter:
|
||||
word2vec_dict[word.upper()] = vector
|
||||
|
||||
print("{}/{} of word vocab have corresponding vectors in {}".format(len(word2vec_dict), len(word_counter), glove_path))
|
||||
return word2vec_dict
|
||||
|
||||
|
||||
def prepro_each(args, data_type, start_ratio=0.0, stop_ratio=1.0):
|
||||
source_path = os.path.join(args.source_dir, "{}-v1.0-aug.json".format(data_type))
|
||||
source_data = json.load(open(source_path, 'r'))
|
||||
|
||||
q, cq, y, rx, rcx, ids, idxs = [], [], [], [], [], [], []
|
||||
x, cx, tx, stx = [], [], [], []
|
||||
answerss = []
|
||||
word_counter, char_counter, lower_word_counter = Counter(), Counter(), Counter()
|
||||
pos_counter = Counter()
|
||||
start_ai = int(round(len(source_data['data']) * start_ratio))
|
||||
stop_ai = int(round(len(source_data['data']) * stop_ratio))
|
||||
for ai, article in enumerate(tqdm(source_data['data'][start_ai:stop_ai])):
|
||||
xp, cxp, txp, stxp = [], [], [], []
|
||||
x.append(xp)
|
||||
cx.append(cxp)
|
||||
tx.append(txp)
|
||||
stx.append(stxp)
|
||||
for pi, para in enumerate(article['paragraphs']):
|
||||
xi = []
|
||||
for dep in para['deps']:
|
||||
if dep is None:
|
||||
xi.append([])
|
||||
else:
|
||||
xi.append([node[0] for node in dep[0]])
|
||||
cxi = [[list(xijk) for xijk in xij] for xij in xi]
|
||||
xp.append(xi)
|
||||
cxp.append(cxi)
|
||||
txp.append(para['consts'])
|
||||
stxp.append([str(load_compressed_tree(s)) for s in para['consts']])
|
||||
trees = map(nltk.tree.Tree.fromstring, para['consts'])
|
||||
for tree in trees:
|
||||
for subtree in tree.subtrees():
|
||||
pos_counter[subtree.label()] += 1
|
||||
|
||||
for xij in xi:
|
||||
for xijk in xij:
|
||||
word_counter[xijk] += len(para['qas'])
|
||||
lower_word_counter[xijk.lower()] += len(para['qas'])
|
||||
for xijkl in xijk:
|
||||
char_counter[xijkl] += len(para['qas'])
|
||||
|
||||
rxi = [ai, pi]
|
||||
assert len(x) - 1 == ai
|
||||
assert len(x[ai]) - 1 == pi
|
||||
for qa in para['qas']:
|
||||
dep = qa['dep']
|
||||
qi = [] if dep is None else [node[0] for node in dep[0]]
|
||||
cqi = [list(qij) for qij in qi]
|
||||
yi = []
|
||||
answers = []
|
||||
for answer in qa['answers']:
|
||||
answers.append(answer['text'])
|
||||
yi0 = answer['answer_word_start'] or [0, 0]
|
||||
yi1 = answer['answer_word_stop'] or [0, 1]
|
||||
assert len(xi[yi0[0]]) > yi0[1]
|
||||
assert len(xi[yi1[0]]) >= yi1[1]
|
||||
yi.append([yi0, yi1])
|
||||
|
||||
for qij in qi:
|
||||
word_counter[qij] += 1
|
||||
lower_word_counter[qij.lower()] += 1
|
||||
for qijk in qij:
|
||||
char_counter[qijk] += 1
|
||||
|
||||
q.append(qi)
|
||||
cq.append(cqi)
|
||||
y.append(yi)
|
||||
rx.append(rxi)
|
||||
rcx.append(rxi)
|
||||
ids.append(qa['id'])
|
||||
idxs.append(len(idxs))
|
||||
answerss.append(answers)
|
||||
|
||||
if args.debug:
|
||||
break
|
||||
|
||||
word2vec_dict = get_word2vec(args, word_counter)
|
||||
lower_word2vec_dict = get_word2vec(args, lower_word_counter)
|
||||
|
||||
data = {'q': q, 'cq': cq, 'y': y, '*x': rx, '*cx': rcx, '*tx': rx, '*stx': rx,
|
||||
'idxs': idxs, 'ids': ids, 'answerss': answerss}
|
||||
shared = {'x': x, 'cx': cx, 'tx': tx, 'stx': stx,
|
||||
'word_counter': word_counter, 'char_counter': char_counter, 'lower_word_counter': lower_word_counter,
|
||||
'word2vec': word2vec_dict, 'lower_word2vec': lower_word2vec_dict, 'pos_counter': pos_counter}
|
||||
|
||||
return data, shared
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
146
tensorflow/SQuAD/squad/utils.py
Normal file
146
tensorflow/SQuAD/squad/utils.py
Normal file
|
@ -0,0 +1,146 @@
|
|||
import re
|
||||
import numpy as np
|
||||
|
||||
|
||||
def get_2d_spans(text, tokenss):
|
||||
spanss = []
|
||||
cur_idx = 0
|
||||
for tokens in tokenss:
|
||||
spans = []
|
||||
for token in tokens:
|
||||
if text.find(token, cur_idx) < 0:
|
||||
print(tokens)
|
||||
print("{} {} {}".format(token, cur_idx, text))
|
||||
raise Exception()
|
||||
cur_idx = text.find(token, cur_idx)
|
||||
spans.append((cur_idx, cur_idx + len(token)))
|
||||
cur_idx += len(token)
|
||||
spanss.append(spans)
|
||||
return spanss
|
||||
|
||||
|
||||
def get_word_span(context, wordss, start, stop):
|
||||
spanss = get_2d_spans(context, wordss)
|
||||
idxs = []
|
||||
for sent_idx, spans in enumerate(spanss):
|
||||
for word_idx, span in enumerate(spans):
|
||||
if not (stop <= span[0] or start >= span[1]):
|
||||
idxs.append((sent_idx, word_idx))
|
||||
|
||||
assert len(idxs) > 0, "{} {} {} {}".format(context, spanss, start, stop)
|
||||
return idxs[0], (idxs[-1][0], idxs[-1][1] + 1)
|
||||
|
||||
|
||||
def get_phrase(context, wordss, span):
|
||||
"""
|
||||
Obtain phrase as substring of context given start and stop indices in word level
|
||||
:param context:
|
||||
:param wordss:
|
||||
:param start: [sent_idx, word_idx]
|
||||
:param stop: [sent_idx, word_idx]
|
||||
:return:
|
||||
"""
|
||||
start, stop = span
|
||||
flat_start = get_flat_idx(wordss, start)
|
||||
flat_stop = get_flat_idx(wordss, stop)
|
||||
words = sum(wordss, [])
|
||||
char_idx = 0
|
||||
char_start, char_stop = None, None
|
||||
for word_idx, word in enumerate(words):
|
||||
char_idx = context.find(word, char_idx)
|
||||
assert char_idx >= 0
|
||||
if word_idx == flat_start:
|
||||
char_start = char_idx
|
||||
char_idx += len(word)
|
||||
if word_idx == flat_stop - 1:
|
||||
char_stop = char_idx
|
||||
assert char_start is not None
|
||||
assert char_stop is not None
|
||||
return context[char_start:char_stop]
|
||||
|
||||
|
||||
def get_flat_idx(wordss, idx):
|
||||
return sum(len(words) for words in wordss[:idx[0]]) + idx[1]
|
||||
|
||||
|
||||
def get_word_idx(context, wordss, idx):
|
||||
spanss = get_2d_spans(context, wordss)
|
||||
return spanss[idx[0]][idx[1]][0]
|
||||
|
||||
|
||||
def process_tokens(temp_tokens):
|
||||
tokens = []
|
||||
for token in temp_tokens:
|
||||
flag = False
|
||||
l = ("-", "\u2212", "\u2014", "\u2013", "/", "~", '"', "'", "\u201C", "\u2019", "\u201D", "\u2018", "\u00B0")
|
||||
# \u2013 is en-dash. Used for number to nubmer
|
||||
# l = ("-", "\u2212", "\u2014", "\u2013")
|
||||
# l = ("\u2013",)
|
||||
tokens.extend(re.split("([{}])".format("".join(l)), token))
|
||||
return tokens
|
||||
|
||||
|
||||
def get_best_span(ypi, yp2i):
|
||||
max_val = 0
|
||||
best_word_span = (0, 1)
|
||||
best_sent_idx = 0
|
||||
for f, (ypif, yp2if) in enumerate(zip(ypi, yp2i)):
|
||||
argmax_j1 = 0
|
||||
for j in range(len(ypif)):
|
||||
val1 = ypif[argmax_j1]
|
||||
if val1 < ypif[j]:
|
||||
val1 = ypif[j]
|
||||
argmax_j1 = j
|
||||
|
||||
val2 = yp2if[j]
|
||||
if val1 * val2 > max_val:
|
||||
best_word_span = (argmax_j1, j)
|
||||
best_sent_idx = f
|
||||
max_val = val1 * val2
|
||||
return ((best_sent_idx, best_word_span[0]), (best_sent_idx, best_word_span[1] + 1)), float(max_val)
|
||||
|
||||
|
||||
def get_best_span_wy(wypi, th):
|
||||
chunk_spans = []
|
||||
scores = []
|
||||
chunk_start = None
|
||||
score = 0
|
||||
l = 0
|
||||
th = min(th, np.max(wypi))
|
||||
for f, wypif in enumerate(wypi):
|
||||
for j, wypifj in enumerate(wypif):
|
||||
if wypifj >= th:
|
||||
if chunk_start is None:
|
||||
chunk_start = f, j
|
||||
score += wypifj
|
||||
l += 1
|
||||
else:
|
||||
if chunk_start is not None:
|
||||
chunk_stop = f, j
|
||||
chunk_spans.append((chunk_start, chunk_stop))
|
||||
scores.append(score/l)
|
||||
score = 0
|
||||
l = 0
|
||||
chunk_start = None
|
||||
if chunk_start is not None:
|
||||
chunk_stop = f, j+1
|
||||
chunk_spans.append((chunk_start, chunk_stop))
|
||||
scores.append(score/l)
|
||||
score = 0
|
||||
l = 0
|
||||
chunk_start = None
|
||||
|
||||
return max(zip(chunk_spans, scores), key=lambda pair: pair[1])
|
||||
|
||||
|
||||
def get_span_score_pairs(ypi, yp2i):
|
||||
span_score_pairs = []
|
||||
for f, (ypif, yp2if) in enumerate(zip(ypi, yp2i)):
|
||||
for j in range(len(ypif)):
|
||||
for k in range(j, len(yp2if)):
|
||||
span = ((f, j), (f, k+1))
|
||||
score = ypif[j] * yp2if[k]
|
||||
span_score_pairs.append((span, score))
|
||||
return span_score_pairs
|
||||
|
||||
|
0
tensorflow/SQuAD/tree/__init__.py
Normal file
0
tensorflow/SQuAD/tree/__init__.py
Normal file
57
tensorflow/SQuAD/tree/cli.py
Normal file
57
tensorflow/SQuAD/tree/cli.py
Normal file
|
@ -0,0 +1,57 @@
|
|||
import os
|
||||
from pprint import pprint
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from tree.main import main as m
|
||||
|
||||
flags = tf.app.flags
|
||||
|
||||
flags.DEFINE_string("model_name", "tree", "Model name [tree]")
|
||||
flags.DEFINE_string("data_dir", "data/squad", "Data dir [data/squad]")
|
||||
flags.DEFINE_integer("run_id", 0, "Run ID [0]")
|
||||
|
||||
flags.DEFINE_integer("batch_size", 128, "Batch size [128]")
|
||||
flags.DEFINE_float("init_lr", 0.5, "Initial learning rate [0.5]")
|
||||
flags.DEFINE_integer("num_epochs", 50, "Total number of epochs for training [50]")
|
||||
flags.DEFINE_integer("num_steps", 0, "Number of steps [0]")
|
||||
flags.DEFINE_integer("eval_num_batches", 100, "eval num batches [100]")
|
||||
flags.DEFINE_integer("load_step", 0, "load step [0]")
|
||||
flags.DEFINE_integer("early_stop", 4, "early stop [4]")
|
||||
|
||||
flags.DEFINE_string("mode", "test", "train | test | forward [test]")
|
||||
flags.DEFINE_boolean("load", True, "load saved data? [True]")
|
||||
flags.DEFINE_boolean("progress", True, "Show progress? [True]")
|
||||
flags.DEFINE_integer("log_period", 100, "Log period [100]")
|
||||
flags.DEFINE_integer("eval_period", 1000, "Eval period [1000]")
|
||||
flags.DEFINE_integer("save_period", 1000, "Save Period [1000]")
|
||||
flags.DEFINE_float("decay", 0.9, "Exponential moving average decay [0.9]")
|
||||
|
||||
flags.DEFINE_boolean("draft", False, "Draft for quick testing? [False]")
|
||||
|
||||
flags.DEFINE_integer("hidden_size", 32, "Hidden size [32]")
|
||||
flags.DEFINE_float("input_keep_prob", 0.5, "Input keep prob [0.5]")
|
||||
flags.DEFINE_integer("char_emb_size", 8, "Char emb size [8]")
|
||||
flags.DEFINE_integer("char_filter_height", 5, "Char filter height [5]")
|
||||
flags.DEFINE_float("wd", 0.0001, "Weight decay [0.001]")
|
||||
flags.DEFINE_bool("lower_word", True, "lower word [True]")
|
||||
flags.DEFINE_bool("dump_eval", True, "dump eval? [True]")
|
||||
|
||||
flags.DEFINE_integer("word_count_th", 100, "word count th [100]")
|
||||
flags.DEFINE_integer("char_count_th", 500, "char count th [500]")
|
||||
flags.DEFINE_integer("sent_size_th", 64, "sent size th [64]")
|
||||
flags.DEFINE_integer("num_sents_th", 8, "num sents th [8]")
|
||||
flags.DEFINE_integer("ques_size_th", 64, "ques size th [64]")
|
||||
flags.DEFINE_integer("word_size_th", 16, "word size th [16]")
|
||||
flags.DEFINE_integer("tree_height_th", 16, "tree height th [16]")
|
||||
|
||||
|
||||
def main(_):
|
||||
config = flags.FLAGS
|
||||
|
||||
config.out_dir = os.path.join("out", config.model_name, str(config.run_id).zfill(2))
|
||||
|
||||
m(config)
|
||||
|
||||
if __name__ == "__main__":
|
||||
tf.app.run()
|
197
tensorflow/SQuAD/tree/evaluator.py
Normal file
197
tensorflow/SQuAD/tree/evaluator.py
Normal file
|
@ -0,0 +1,197 @@
|
|||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
from tree.read_data import DataSet
|
||||
from my.nltk_utils import span_f1
|
||||
|
||||
|
||||
class Evaluation(object):
|
||||
def __init__(self, data_type, global_step, idxs, yp):
|
||||
self.data_type = data_type
|
||||
self.global_step = global_step
|
||||
self.idxs = idxs
|
||||
self.yp = yp
|
||||
self.num_examples = len(yp)
|
||||
self.dict = {'data_type': data_type,
|
||||
'global_step': global_step,
|
||||
'yp': yp,
|
||||
'idxs': idxs,
|
||||
'num_examples': self.num_examples}
|
||||
self.summaries = None
|
||||
|
||||
def __repr__(self):
|
||||
return "{} step {}".format(self.data_type, self.global_step)
|
||||
|
||||
def __add__(self, other):
|
||||
if other == 0:
|
||||
return self
|
||||
assert self.data_type == other.data_type
|
||||
assert self.global_step == other.global_step
|
||||
new_yp = self.yp + other.yp
|
||||
new_idxs = self.idxs + other.idxs
|
||||
return Evaluation(self.data_type, self.global_step, new_idxs, new_yp)
|
||||
|
||||
def __radd__(self, other):
|
||||
return self.__add__(other)
|
||||
|
||||
|
||||
class LabeledEvaluation(Evaluation):
|
||||
def __init__(self, data_type, global_step, idxs, yp, y):
|
||||
super(LabeledEvaluation, self).__init__(data_type, global_step, idxs, yp)
|
||||
self.y = y
|
||||
self.dict['y'] = y
|
||||
|
||||
def __add__(self, other):
|
||||
if other == 0:
|
||||
return self
|
||||
assert self.data_type == other.data_type
|
||||
assert self.global_step == other.global_step
|
||||
new_yp = self.yp + other.yp
|
||||
new_y = self.y + other.y
|
||||
new_idxs = self.idxs + other.idxs
|
||||
return LabeledEvaluation(self.data_type, self.global_step, new_idxs, new_yp, new_y)
|
||||
|
||||
|
||||
class AccuracyEvaluation(LabeledEvaluation):
|
||||
def __init__(self, data_type, global_step, idxs, yp, y, correct, loss):
|
||||
super(AccuracyEvaluation, self).__init__(data_type, global_step, idxs, yp, y)
|
||||
self.loss = loss
|
||||
self.correct = correct
|
||||
self.acc = sum(correct) / len(correct)
|
||||
self.dict['loss'] = loss
|
||||
self.dict['correct'] = correct
|
||||
self.dict['acc'] = self.acc
|
||||
loss_summary = tf.Summary(value=[tf.Summary.Value(tag='dev/loss', simple_value=self.loss)])
|
||||
acc_summary = tf.Summary(value=[tf.Summary.Value(tag='dev/acc', simple_value=self.acc)])
|
||||
self.summaries = [loss_summary, acc_summary]
|
||||
|
||||
def __repr__(self):
|
||||
return "{} step {}: accuracy={}, loss={}".format(self.data_type, self.global_step, self.acc, self.loss)
|
||||
|
||||
def __add__(self, other):
|
||||
if other == 0:
|
||||
return self
|
||||
assert self.data_type == other.data_type
|
||||
assert self.global_step == other.global_step
|
||||
new_idxs = self.idxs + other.idxs
|
||||
new_yp = self.yp + other.yp
|
||||
new_y = self.y + other.y
|
||||
new_correct = self.correct + other.correct
|
||||
new_loss = (self.loss * self.num_examples + other.loss * other.num_examples) / len(new_correct)
|
||||
return AccuracyEvaluation(self.data_type, self.global_step, new_idxs, new_yp, new_y, new_correct, new_loss)
|
||||
|
||||
|
||||
class Evaluator(object):
|
||||
def __init__(self, config, model):
|
||||
self.config = config
|
||||
self.model = model
|
||||
|
||||
def get_evaluation(self, sess, batch):
|
||||
idxs, data_set = batch
|
||||
feed_dict = self.model.get_feed_dict(data_set, False, supervised=False)
|
||||
global_step, yp = sess.run([self.model.global_step, self.model.yp], feed_dict=feed_dict)
|
||||
yp = yp[:data_set.num_examples]
|
||||
e = Evaluation(data_set.data_type, int(global_step), idxs, yp.tolist())
|
||||
return e
|
||||
|
||||
def get_evaluation_from_batches(self, sess, batches):
|
||||
e = sum(self.get_evaluation(sess, batch) for batch in batches)
|
||||
return e
|
||||
|
||||
|
||||
class LabeledEvaluator(Evaluator):
|
||||
def get_evaluation(self, sess, batch):
|
||||
idxs, data_set = batch
|
||||
feed_dict = self.model.get_feed_dict(data_set, False, supervised=False)
|
||||
global_step, yp = sess.run([self.model.global_step, self.model.yp], feed_dict=feed_dict)
|
||||
yp = yp[:data_set.num_examples]
|
||||
y = feed_dict[self.model.y]
|
||||
e = LabeledEvaluation(data_set.data_type, int(global_step), idxs, yp.tolist(), y.tolist())
|
||||
return e
|
||||
|
||||
|
||||
class AccuracyEvaluator(LabeledEvaluator):
|
||||
def get_evaluation(self, sess, batch):
|
||||
idxs, data_set = batch
|
||||
assert isinstance(data_set, DataSet)
|
||||
feed_dict = self.model.get_feed_dict(data_set, False)
|
||||
global_step, yp, loss = sess.run([self.model.global_step, self.model.yp, self.model.loss], feed_dict=feed_dict)
|
||||
y = feed_dict[self.model.y]
|
||||
yp = yp[:data_set.num_examples]
|
||||
correct = [self.__class__.compare(yi, ypi) for yi, ypi in zip(y, yp)]
|
||||
e = AccuracyEvaluation(data_set.data_type, int(global_step), idxs, yp.tolist(), y.tolist(), correct, float(loss))
|
||||
return e
|
||||
|
||||
@staticmethod
|
||||
def compare(yi, ypi):
|
||||
return int(np.argmax(yi)) == int(np.argmax(ypi))
|
||||
|
||||
|
||||
class AccuracyEvaluator2(AccuracyEvaluator):
|
||||
@staticmethod
|
||||
def compare(yi, ypi):
|
||||
i = int(np.argmax(yi.flatten()))
|
||||
j = int(np.argmax(ypi.flatten()))
|
||||
# print(i, j, i == j)
|
||||
return i == j
|
||||
|
||||
|
||||
class TempEvaluation(AccuracyEvaluation):
|
||||
def __init__(self, data_type, global_step, idxs, yp, yp2, y, y2, correct, loss, f1s):
|
||||
super(TempEvaluation, self).__init__(data_type, global_step, idxs, yp, y, correct, loss)
|
||||
self.y2 = y2
|
||||
self.yp2 = yp2
|
||||
self.f1s = f1s
|
||||
self.f1 = float(np.mean(f1s))
|
||||
self.dict['y2'] = y2
|
||||
self.dict['yp2'] = yp2
|
||||
self.dict['f1s'] = f1s
|
||||
self.dict['f1'] = self.f1
|
||||
f1_summary = tf.Summary(value=[tf.Summary.Value(tag='dev/f1', simple_value=self.f1)])
|
||||
self.summaries.append(f1_summary)
|
||||
|
||||
def __add__(self, other):
|
||||
if other == 0:
|
||||
return self
|
||||
assert self.data_type == other.data_type
|
||||
assert self.global_step == other.global_step
|
||||
new_idxs = self.idxs + other.idxs
|
||||
new_yp = self.yp + other.yp
|
||||
new_yp2 = self.yp2 + other.yp2
|
||||
new_y = self.y + other.y
|
||||
new_y2 = self.y2 + other.y2
|
||||
new_correct = self.correct + other.correct
|
||||
new_f1s = self.f1s + other.f1s
|
||||
new_loss = (self.loss * self.num_examples + other.loss * other.num_examples) / len(new_correct)
|
||||
return TempEvaluation(self.data_type, self.global_step, new_idxs, new_yp, new_yp2, new_y, new_y2, new_correct, new_loss, new_f1s)
|
||||
|
||||
|
||||
class TempEvaluator(LabeledEvaluator):
|
||||
def get_evaluation(self, sess, batch):
|
||||
idxs, data_set = batch
|
||||
assert isinstance(data_set, DataSet)
|
||||
feed_dict = self.model.get_feed_dict(data_set, False)
|
||||
global_step, yp, yp2, loss = sess.run([self.model.global_step, self.model.yp, self.model.yp2, self.model.loss], feed_dict=feed_dict)
|
||||
y, y2 = feed_dict[self.model.y], feed_dict[self.model.y2]
|
||||
yp, yp2 = yp[:data_set.num_examples], yp2[:data_set.num_examples]
|
||||
correct = [self.__class__.compare(yi, y2i, ypi, yp2i) for yi, y2i, ypi, yp2i in zip(y, y2, yp, yp2)]
|
||||
f1s = [self.__class__.span_f1(yi, y2i, ypi, yp2i) for yi, y2i, ypi, yp2i in zip(y, y2, yp, yp2)]
|
||||
e = TempEvaluation(data_set.data_type, int(global_step), idxs, yp.tolist(), yp2.tolist(), y.tolist(), y2.tolist(), correct, float(loss), f1s)
|
||||
return e
|
||||
|
||||
@staticmethod
|
||||
def compare(yi, y2i, ypi, yp2i):
|
||||
i = int(np.argmax(yi.flatten()))
|
||||
j = int(np.argmax(ypi.flatten()))
|
||||
k = int(np.argmax(y2i.flatten()))
|
||||
l = int(np.argmax(yp2i.flatten()))
|
||||
# print(i, j, i == j)
|
||||
return i == j and k == l
|
||||
|
||||
@staticmethod
|
||||
def span_f1(yi, y2i, ypi, yp2i):
|
||||
true_span = (np.argmax(yi.flatten()), np.argmax(y2i.flatten())+1)
|
||||
pred_span = (np.argmax(ypi.flatten()), np.argmax(yp2i.flatten())+1)
|
||||
f1 = span_f1(true_span, pred_span)
|
||||
return f1
|
||||
|
54
tensorflow/SQuAD/tree/graph_handler.py
Normal file
54
tensorflow/SQuAD/tree/graph_handler.py
Normal file
|
@ -0,0 +1,54 @@
|
|||
import json
|
||||
from json import encoder
|
||||
import os
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from tree.evaluator import Evaluation
|
||||
from my.utils import short_floats
|
||||
|
||||
|
||||
class GraphHandler(object):
|
||||
def __init__(self, config):
|
||||
self.config = config
|
||||
self.saver = tf.train.Saver()
|
||||
self.writer = None
|
||||
self.save_path = os.path.join(config.save_dir, config.model_name)
|
||||
|
||||
def initialize(self, sess):
|
||||
if self.config.load:
|
||||
self._load(sess)
|
||||
else:
|
||||
sess.run(tf.global_variables_initializer())
|
||||
|
||||
if self.config.mode == 'train':
|
||||
self.writer = tf.summary.FileWriter(self.config.log_dir, graph=tf.get_default_graph())
|
||||
|
||||
def save(self, sess, global_step=None):
|
||||
self.saver.save(sess, self.save_path, global_step=global_step)
|
||||
|
||||
def _load(self, sess):
|
||||
config = self.config
|
||||
if config.load_step > 0:
|
||||
save_path = os.path.join(config.save_dir, "{}-{}".format(config.model_name, config.load_step))
|
||||
else:
|
||||
save_dir = config.save_dir
|
||||
checkpoint = tf.train.get_checkpoint_state(save_dir)
|
||||
assert checkpoint is not None, "cannot load checkpoint at {}".format(save_dir)
|
||||
save_path = checkpoint.model_checkpoint_path
|
||||
print("Loading saved model from {}".format(save_path))
|
||||
self.saver.restore(sess, save_path)
|
||||
|
||||
def add_summary(self, summary, global_step):
|
||||
self.writer.add_summary(summary, global_step)
|
||||
|
||||
def add_summaries(self, summaries, global_step):
|
||||
for summary in summaries:
|
||||
self.add_summary(summary, global_step)
|
||||
|
||||
def dump_eval(self, e, precision=2):
|
||||
assert isinstance(e, Evaluation)
|
||||
path = os.path.join(self.config.eval_dir, "{}-{}.json".format(e.data_type, str(e.global_step).zfill(6)))
|
||||
with open(path, 'w') as fh:
|
||||
json.dump(short_floats(e.dict, precision), fh)
|
||||
|
187
tensorflow/SQuAD/tree/main.py
Normal file
187
tensorflow/SQuAD/tree/main.py
Normal file
|
@ -0,0 +1,187 @@
|
|||
import argparse
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
import shutil
|
||||
from pprint import pprint
|
||||
|
||||
import tensorflow as tf
|
||||
from tqdm import tqdm
|
||||
import numpy as np
|
||||
|
||||
from tree.evaluator import AccuracyEvaluator2, Evaluator
|
||||
from tree.graph_handler import GraphHandler
|
||||
from tree.model import Model
|
||||
from tree.trainer import Trainer
|
||||
|
||||
from tree.read_data import load_metadata, read_data, get_squad_data_filter, update_config
|
||||
|
||||
|
||||
def main(config):
|
||||
set_dirs(config)
|
||||
if config.mode == 'train':
|
||||
_train(config)
|
||||
elif config.mode == 'test':
|
||||
_test(config)
|
||||
elif config.mode == 'forward':
|
||||
_forward(config)
|
||||
else:
|
||||
raise ValueError("invalid value for 'mode': {}".format(config.mode))
|
||||
|
||||
|
||||
def _config_draft(config):
|
||||
if config.draft:
|
||||
config.num_steps = 10
|
||||
config.eval_period = 10
|
||||
config.log_period = 1
|
||||
config.save_period = 10
|
||||
config.eval_num_batches = 1
|
||||
|
||||
|
||||
def _train(config):
|
||||
# load_metadata(config, 'train') # this updates the config file according to metadata file
|
||||
|
||||
data_filter = get_squad_data_filter(config)
|
||||
train_data = read_data(config, 'train', config.load, data_filter=data_filter)
|
||||
dev_data = read_data(config, 'dev', True, data_filter=data_filter)
|
||||
update_config(config, [train_data, dev_data])
|
||||
|
||||
_config_draft(config)
|
||||
|
||||
word2vec_dict = train_data.shared['lower_word2vec'] if config.lower_word else train_data.shared['word2vec']
|
||||
word2idx_dict = train_data.shared['word2idx']
|
||||
idx2vec_dict = {word2idx_dict[word]: vec for word, vec in word2vec_dict.items() if word in word2idx_dict}
|
||||
print("{}/{} unique words have corresponding glove vectors.".format(len(idx2vec_dict), len(word2idx_dict)))
|
||||
emb_mat = np.array([idx2vec_dict[idx] if idx in idx2vec_dict
|
||||
else np.random.multivariate_normal(np.zeros(config.word_emb_size), np.eye(config.word_emb_size))
|
||||
for idx in range(config.word_vocab_size)])
|
||||
config.emb_mat = emb_mat
|
||||
|
||||
# construct model graph and variables (using default graph)
|
||||
pprint(config.__flags, indent=2)
|
||||
model = Model(config)
|
||||
trainer = Trainer(config, model)
|
||||
evaluator = AccuracyEvaluator2(config, model)
|
||||
graph_handler = GraphHandler(config) # controls all tensors and variables in the graph, including loading /saving
|
||||
|
||||
# Variables
|
||||
sess = tf.Session()
|
||||
graph_handler.initialize(sess)
|
||||
|
||||
# begin training
|
||||
num_steps = config.num_steps or int(config.num_epochs * train_data.num_examples / config.batch_size)
|
||||
max_acc = 0
|
||||
noupdate_count = 0
|
||||
global_step = 0
|
||||
for _, batch in tqdm(train_data.get_batches(config.batch_size, num_batches=num_steps, shuffle=True), total=num_steps):
|
||||
global_step = sess.run(model.global_step) + 1 # +1 because all calculations are done after step
|
||||
get_summary = global_step % config.log_period == 0
|
||||
loss, summary, train_op = trainer.step(sess, batch, get_summary=get_summary)
|
||||
if get_summary:
|
||||
graph_handler.add_summary(summary, global_step)
|
||||
|
||||
# Occasional evaluation and saving
|
||||
if global_step % config.save_period == 0:
|
||||
graph_handler.save(sess, global_step=global_step)
|
||||
if global_step % config.eval_period == 0:
|
||||
num_batches = math.ceil(dev_data.num_examples / config.batch_size)
|
||||
if 0 < config.eval_num_batches < num_batches:
|
||||
num_batches = config.eval_num_batches
|
||||
e = evaluator.get_evaluation_from_batches(
|
||||
sess, tqdm(dev_data.get_batches(config.batch_size, num_batches=num_batches), total=num_batches))
|
||||
graph_handler.add_summaries(e.summaries, global_step)
|
||||
if e.acc > max_acc:
|
||||
max_acc = e.acc
|
||||
noupdate_count = 0
|
||||
else:
|
||||
noupdate_count += 1
|
||||
if noupdate_count == config.early_stop:
|
||||
break
|
||||
if config.dump_eval:
|
||||
graph_handler.dump_eval(e)
|
||||
if global_step % config.save_period != 0:
|
||||
graph_handler.save(sess, global_step=global_step)
|
||||
|
||||
|
||||
def _test(config):
|
||||
test_data = read_data(config, 'test', True)
|
||||
update_config(config, [test_data])
|
||||
|
||||
_config_draft(config)
|
||||
|
||||
pprint(config.__flags, indent=2)
|
||||
model = Model(config)
|
||||
evaluator = AccuracyEvaluator2(config, model)
|
||||
graph_handler = GraphHandler(config) # controls all tensors and variables in the graph, including loading /saving
|
||||
|
||||
sess = tf.Session()
|
||||
graph_handler.initialize(sess)
|
||||
|
||||
num_batches = math.ceil(test_data.num_examples / config.batch_size)
|
||||
if 0 < config.eval_num_batches < num_batches:
|
||||
num_batches = config.eval_num_batches
|
||||
e = evaluator.get_evaluation_from_batches(sess, tqdm(test_data.get_batches(config.batch_size, num_batches=num_batches), total=num_batches))
|
||||
print(e)
|
||||
if config.dump_eval:
|
||||
graph_handler.dump_eval(e)
|
||||
|
||||
|
||||
def _forward(config):
|
||||
|
||||
forward_data = read_data(config, 'forward', True)
|
||||
|
||||
_config_draft(config)
|
||||
|
||||
pprint(config.__flag, indent=2)
|
||||
model = Model(config)
|
||||
evaluator = Evaluator(config, model)
|
||||
graph_handler = GraphHandler(config) # controls all tensors and variables in the graph, including loading /saving
|
||||
|
||||
sess = tf.Session()
|
||||
graph_handler.initialize(sess)
|
||||
|
||||
num_batches = math.ceil(forward_data.num_examples / config.batch_size)
|
||||
if 0 < config.eval_num_batches < num_batches:
|
||||
num_batches = config.eval_num_batches
|
||||
e = evaluator.get_evaluation_from_batches(sess, tqdm(forward_data.get_batches(config.batch_size, num_batches=num_batches), total=num_batches))
|
||||
print(e)
|
||||
if config.dump_eval:
|
||||
graph_handler.dump_eval(e)
|
||||
|
||||
|
||||
def set_dirs(config):
|
||||
# create directories
|
||||
if not config.load and os.path.exists(config.out_dir):
|
||||
shutil.rmtree(config.out_dir)
|
||||
|
||||
config.save_dir = os.path.join(config.out_dir, "save")
|
||||
config.log_dir = os.path.join(config.out_dir, "log")
|
||||
config.eval_dir = os.path.join(config.out_dir, "eval")
|
||||
if not os.path.exists(config.out_dir):
|
||||
os.makedirs(config.out_dir)
|
||||
if not os.path.exists(config.save_dir):
|
||||
os.mkdir(config.save_dir)
|
||||
if not os.path.exists(config.log_dir):
|
||||
os.mkdir(config.eval_dir)
|
||||
|
||||
|
||||
def _get_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("config_path")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
class Config(object):
|
||||
def __init__(self, **entries):
|
||||
self.__dict__.update(entries)
|
||||
|
||||
|
||||
def _run():
|
||||
args = _get_args()
|
||||
with open(args.config_path, 'r') as fh:
|
||||
config = Config(**json.load(fh))
|
||||
main(config)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
_run()
|
248
tensorflow/SQuAD/tree/model.py
Normal file
248
tensorflow/SQuAD/tree/model.py
Normal file
|
@ -0,0 +1,248 @@
|
|||
import nltk
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from tensorflow.python.ops.rnn_cell import BasicLSTMCell
|
||||
|
||||
from my.nltk_utils import tree2matrix, find_max_f1_subtree, load_compressed_tree, set_span
|
||||
from tree.read_data import DataSet
|
||||
from my.tensorflow import exp_mask, get_initializer
|
||||
from my.tensorflow.nn import linear
|
||||
from my.tensorflow.rnn import bidirectional_dynamic_rnn, dynamic_rnn
|
||||
from my.tensorflow.rnn_cell import SwitchableDropoutWrapper, NoOpCell, TreeRNNCell
|
||||
|
||||
|
||||
class Model(object):
|
||||
def __init__(self, config):
|
||||
self.config = config
|
||||
self.global_step = tf.get_variable('global_step', shape=[], dtype='int32',
|
||||
initializer=tf.constant_initializer(0), trainable=False)
|
||||
|
||||
# Define forward inputs here
|
||||
N, M, JX, JQ, VW, VC, W, H = \
|
||||
config.batch_size, config.max_num_sents, config.max_sent_size, \
|
||||
config.max_ques_size, config.word_vocab_size, config.char_vocab_size, config.max_word_size, config.max_tree_height
|
||||
self.x = tf.placeholder('int32', [None, M, JX], name='x')
|
||||
self.cx = tf.placeholder('int32', [None, M, JX, W], name='cx')
|
||||
self.q = tf.placeholder('int32', [None, JQ], name='q')
|
||||
self.cq = tf.placeholder('int32', [None, JQ, W], name='cq')
|
||||
self.tx = tf.placeholder('int32', [None, M, H, JX], name='tx')
|
||||
self.tx_edge_mask = tf.placeholder('bool', [None, M, H, JX, JX], name='tx_edge_mask')
|
||||
self.y = tf.placeholder('bool', [None, M, H, JX], name='y')
|
||||
self.is_train = tf.placeholder('bool', [], name='is_train')
|
||||
|
||||
# Define misc
|
||||
|
||||
# Forward outputs / loss inputs
|
||||
self.logits = None
|
||||
self.yp = None
|
||||
self.var_list = None
|
||||
|
||||
# Loss outputs
|
||||
self.loss = None
|
||||
|
||||
self._build_forward()
|
||||
self._build_loss()
|
||||
|
||||
self.ema_op = self._get_ema_op()
|
||||
self.summary = tf.summary.merge_all()
|
||||
|
||||
def _build_forward(self):
|
||||
config = self.config
|
||||
N, M, JX, JQ, VW, VC, d, dc, W = \
|
||||
config.batch_size, config.max_num_sents, config.max_sent_size, \
|
||||
config.max_ques_size, config.word_vocab_size, config.char_vocab_size, config.hidden_size, \
|
||||
config.char_emb_size, config.max_word_size
|
||||
H = config.max_tree_height
|
||||
|
||||
x_mask = self.x > 0
|
||||
q_mask = self.q > 0
|
||||
tx_mask = self.tx > 0 # [N, M, H, JX]
|
||||
|
||||
with tf.variable_scope("char_emb"):
|
||||
char_emb_mat = tf.get_variable("char_emb_mat", shape=[VC, dc], dtype='float')
|
||||
Acx = tf.nn.embedding_lookup(char_emb_mat, self.cx) # [N, M, JX, W, dc]
|
||||
Acq = tf.nn.embedding_lookup(char_emb_mat, self.cq) # [N, JQ, W, dc]
|
||||
|
||||
filter = tf.get_variable("filter", shape=[1, config.char_filter_height, dc, d], dtype='float')
|
||||
bias = tf.get_variable("bias", shape=[d], dtype='float')
|
||||
strides = [1, 1, 1, 1]
|
||||
Acx = tf.reshape(Acx, [-1, JX, W, dc])
|
||||
Acq = tf.reshape(Acq, [-1, JQ, W, dc])
|
||||
xxc = tf.nn.conv2d(Acx, filter, strides, "VALID") + bias # [N*M, JX, W/filter_stride, d]
|
||||
qqc = tf.nn.conv2d(Acq, filter, strides, "VALID") + bias # [N, JQ, W/filter_stride, d]
|
||||
xxc = tf.reshape(tf.reduce_max(tf.nn.relu(xxc), 2), [-1, M, JX, d])
|
||||
qqc = tf.reshape(tf.reduce_max(tf.nn.relu(qqc), 2), [-1, JQ, d])
|
||||
|
||||
with tf.variable_scope("word_emb"):
|
||||
if config.mode == 'train':
|
||||
word_emb_mat = tf.get_variable("word_emb_mat", dtype='float', shape=[VW, config.word_emb_size], initializer=get_initializer(config.emb_mat))
|
||||
else:
|
||||
word_emb_mat = tf.get_variable("word_emb_mat", shape=[VW, config.word_emb_size], dtype='float')
|
||||
Ax = tf.nn.embedding_lookup(word_emb_mat, self.x) # [N, M, JX, d]
|
||||
Aq = tf.nn.embedding_lookup(word_emb_mat, self.q) # [N, JQ, d]
|
||||
# Ax = linear([Ax], d, False, scope='Ax_reshape')
|
||||
# Aq = linear([Aq], d, False, scope='Aq_reshape')
|
||||
|
||||
xx = tf.concat(axis=3, values=[xxc, Ax]) # [N, M, JX, 2d]
|
||||
qq = tf.concat(axis=2, values=[qqc, Aq]) # [N, JQ, 2d]
|
||||
D = d + config.word_emb_size
|
||||
|
||||
with tf.variable_scope("pos_emb"):
|
||||
pos_emb_mat = tf.get_variable("pos_emb_mat", shape=[config.pos_vocab_size, d], dtype='float')
|
||||
Atx = tf.nn.embedding_lookup(pos_emb_mat, self.tx) # [N, M, H, JX, d]
|
||||
|
||||
cell = BasicLSTMCell(D, state_is_tuple=True)
|
||||
cell = SwitchableDropoutWrapper(cell, self.is_train, input_keep_prob=config.input_keep_prob)
|
||||
x_len = tf.reduce_sum(tf.cast(x_mask, 'int32'), 2) # [N, M]
|
||||
q_len = tf.reduce_sum(tf.cast(q_mask, 'int32'), 1) # [N]
|
||||
|
||||
with tf.variable_scope("rnn"):
|
||||
(fw_h, bw_h), _ = bidirectional_dynamic_rnn(cell, cell, xx, x_len, dtype='float', scope='start') # [N, M, JX, 2d]
|
||||
tf.get_variable_scope().reuse_variables()
|
||||
(fw_us, bw_us), (_, (fw_u, bw_u)) = bidirectional_dynamic_rnn(cell, cell, qq, q_len, dtype='float', scope='start') # [N, J, d], [N, d]
|
||||
u = (fw_u + bw_u) / 2.0
|
||||
h = (fw_h + bw_h) / 2.0
|
||||
|
||||
with tf.variable_scope("h"):
|
||||
no_op_cell = NoOpCell(D)
|
||||
tree_rnn_cell = TreeRNNCell(no_op_cell, d, tf.reduce_max)
|
||||
initial_state = tf.reshape(h, [N*M*JX, D]) # [N*M*JX, D]
|
||||
inputs = tf.concat(axis=4, values=[Atx, tf.cast(self.tx_edge_mask, 'float')]) # [N, M, H, JX, d+JX]
|
||||
inputs = tf.reshape(tf.transpose(inputs, [0, 1, 3, 2, 4]), [N*M*JX, H, d + JX]) # [N*M*JX, H, d+JX]
|
||||
length = tf.reshape(tf.reduce_sum(tf.cast(tx_mask, 'int32'), 2), [N*M*JX])
|
||||
# length = tf.reshape(tf.reduce_sum(tf.cast(tf.transpose(tx_mask, [0, 1, 3, 2]), 'float'), 3), [-1])
|
||||
h, _ = dynamic_rnn(tree_rnn_cell, inputs, length, initial_state=initial_state) # [N*M*JX, H, D]
|
||||
h = tf.transpose(tf.reshape(h, [N, M, JX, H, D]), [0, 1, 3, 2, 4]) # [N, M, H, JX, D]
|
||||
|
||||
u = tf.expand_dims(tf.expand_dims(tf.expand_dims(u, 1), 1), 1) # [N, 1, 1, 1, 4d]
|
||||
dot = linear(h * u, 1, True, squeeze=True, scope='dot') # [N, M, H, JX]
|
||||
# self.logits = tf.reshape(dot, [N, M * H * JX])
|
||||
self.logits = tf.reshape(exp_mask(dot, tx_mask), [N, M * H * JX]) # [N, M, H, JX]
|
||||
self.yp = tf.reshape(tf.nn.softmax(self.logits), [N, M, H, JX])
|
||||
|
||||
def _build_loss(self):
|
||||
config = self.config
|
||||
N, M, JX, JQ, VW, VC = \
|
||||
config.batch_size, config.max_num_sents, config.max_sent_size, \
|
||||
config.max_ques_size, config.word_vocab_size, config.char_vocab_size
|
||||
H = config.max_tree_height
|
||||
ce_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
|
||||
logits=self.logits, labels=tf.cast(tf.reshape(self.y, [N, M * H * JX]), 'float')))
|
||||
tf.add_to_collection('losses', ce_loss)
|
||||
self.loss = tf.add_n(tf.get_collection('losses'), name='loss')
|
||||
tf.summary.scalar(self.loss.op.name, self.loss)
|
||||
tf.add_to_collection('ema/scalar', self.loss)
|
||||
|
||||
def _get_ema_op(self):
|
||||
ema = tf.train.ExponentialMovingAverage(self.config.decay)
|
||||
ema_op = ema.apply(tf.get_collection("ema/scalar") + tf.get_collection("ema/histogram"))
|
||||
for var in tf.get_collection("ema/scalar"):
|
||||
ema_var = ema.average(var)
|
||||
tf.summary.scalar(ema_var.op.name, ema_var)
|
||||
for var in tf.get_collection("ema/histogram"):
|
||||
ema_var = ema.average(var)
|
||||
tf.summary.histogram(ema_var.op.name, ema_var)
|
||||
return ema_op
|
||||
|
||||
def get_loss(self):
|
||||
return self.loss
|
||||
|
||||
def get_global_step(self):
|
||||
return self.global_step
|
||||
|
||||
def get_var_list(self):
|
||||
return self.var_list
|
||||
|
||||
def get_feed_dict(self, batch, is_train, supervised=True):
|
||||
assert isinstance(batch, DataSet)
|
||||
config = self.config
|
||||
N, M, JX, JQ, VW, VC, d, W, H = \
|
||||
config.batch_size, config.max_num_sents, config.max_sent_size, \
|
||||
config.max_ques_size, config.word_vocab_size, config.char_vocab_size, config.hidden_size, config.max_word_size, \
|
||||
config.max_tree_height
|
||||
feed_dict = {}
|
||||
|
||||
x = np.zeros([N, M, JX], dtype='int32')
|
||||
cx = np.zeros([N, M, JX, W], dtype='int32')
|
||||
q = np.zeros([N, JQ], dtype='int32')
|
||||
cq = np.zeros([N, JQ, W], dtype='int32')
|
||||
tx = np.zeros([N, M, H, JX], dtype='int32')
|
||||
tx_edge_mask = np.zeros([N, M, H, JX, JX], dtype='bool')
|
||||
|
||||
feed_dict[self.x] = x
|
||||
feed_dict[self.cx] = cx
|
||||
feed_dict[self.q] = q
|
||||
feed_dict[self.cq] = cq
|
||||
feed_dict[self.tx] = tx
|
||||
feed_dict[self.tx_edge_mask] = tx_edge_mask
|
||||
feed_dict[self.is_train] = is_train
|
||||
|
||||
def _get_word(word):
|
||||
d = batch.shared['word2idx']
|
||||
for each in (word, word.lower(), word.capitalize(), word.upper()):
|
||||
if each in d:
|
||||
return d[each]
|
||||
return 1
|
||||
|
||||
def _get_char(char):
|
||||
d = batch.shared['char2idx']
|
||||
if char in d:
|
||||
return d[char]
|
||||
return 1
|
||||
|
||||
def _get_pos(tree):
|
||||
d = batch.shared['pos2idx']
|
||||
if tree.label() in d:
|
||||
return d[tree.label()]
|
||||
return 1
|
||||
|
||||
for i, xi in enumerate(batch.data['x']):
|
||||
for j, xij in enumerate(xi):
|
||||
for k, xijk in enumerate(xij):
|
||||
x[i, j, k] = _get_word(xijk)
|
||||
|
||||
for i, cxi in enumerate(batch.data['cx']):
|
||||
for j, cxij in enumerate(cxi):
|
||||
for k, cxijk in enumerate(cxij):
|
||||
for l, cxijkl in enumerate(cxijk):
|
||||
cx[i, j, k, l] = _get_char(cxijkl)
|
||||
if l + 1 == config.max_word_size:
|
||||
break
|
||||
|
||||
for i, qi in enumerate(batch.data['q']):
|
||||
for j, qij in enumerate(qi):
|
||||
q[i, j] = _get_word(qij)
|
||||
|
||||
for i, cqi in enumerate(batch.data['cq']):
|
||||
for j, cqij in enumerate(cqi):
|
||||
for k, cqijk in enumerate(cqij):
|
||||
cq[i, j, k] = _get_char(cqijk)
|
||||
if k + 1 == config.max_word_size:
|
||||
break
|
||||
|
||||
for i, txi in enumerate(batch.data['stx']):
|
||||
for j, txij in enumerate(txi):
|
||||
txij_mat, txij_mask = tree2matrix(nltk.tree.Tree.fromstring(txij), _get_pos, row_size=H, col_size=JX)
|
||||
tx[i, j, :, :], tx_edge_mask[i, j, :, :, :] = txij_mat, txij_mask
|
||||
|
||||
if supervised:
|
||||
y = np.zeros([N, M, H, JX], dtype='bool')
|
||||
feed_dict[self.y] = y
|
||||
for i, yi in enumerate(batch.data['y']):
|
||||
start_idx, stop_idx = yi
|
||||
sent_idx = start_idx[0]
|
||||
if start_idx[0] == stop_idx[0]:
|
||||
span = [start_idx[1], stop_idx[1]]
|
||||
else:
|
||||
span = [start_idx[1], len(batch.data['x'][sent_idx])]
|
||||
tree = nltk.tree.Tree.fromstring(batch.data['stx'][i][sent_idx])
|
||||
set_span(tree)
|
||||
best_subtree = find_max_f1_subtree(tree, span)
|
||||
|
||||
def _get_y(t):
|
||||
return t == best_subtree
|
||||
|
||||
yij, _ = tree2matrix(tree, _get_y, H, JX, dtype='bool')
|
||||
y[i, sent_idx, :, :] = yij
|
||||
|
||||
return feed_dict
|
159
tensorflow/SQuAD/tree/read_data.py
Normal file
159
tensorflow/SQuAD/tree/read_data.py
Normal file
|
@ -0,0 +1,159 @@
|
|||
import json
|
||||
import os
|
||||
import random
|
||||
import itertools
|
||||
import math
|
||||
|
||||
import nltk
|
||||
|
||||
from my.nltk_utils import load_compressed_tree
|
||||
from my.utils import index
|
||||
|
||||
|
||||
class DataSet(object):
|
||||
def __init__(self, data, data_type, shared=None, valid_idxs=None):
|
||||
total_num_examples = len(next(iter(data.values())))
|
||||
self.data = data # e.g. {'X': [0, 1, 2], 'Y': [2, 3, 4]}
|
||||
self.data_type = data_type
|
||||
self.shared = shared
|
||||
self.valid_idxs = range(total_num_examples) if valid_idxs is None else valid_idxs
|
||||
self.num_examples = len(self.valid_idxs)
|
||||
|
||||
def get_batches(self, batch_size, num_batches=None, shuffle=False):
|
||||
num_batches_per_epoch = int(math.ceil(self.num_examples / batch_size))
|
||||
if num_batches is None:
|
||||
num_batches = num_batches_per_epoch
|
||||
num_epochs = int(math.ceil(num_batches / num_batches_per_epoch))
|
||||
|
||||
idxs = itertools.chain.from_iterable(random.sample(self.valid_idxs, len(self.valid_idxs))
|
||||
if shuffle else self.valid_idxs
|
||||
for _ in range(num_epochs))
|
||||
for _ in range(num_batches):
|
||||
batch_idxs = tuple(itertools.islice(idxs, batch_size))
|
||||
batch_data = {}
|
||||
for key, val in self.data.items():
|
||||
if key.startswith('*'):
|
||||
assert self.shared is not None
|
||||
shared_key = key[1:]
|
||||
batch_data[shared_key] = [index(self.shared[shared_key], val[idx]) for idx in batch_idxs]
|
||||
else:
|
||||
batch_data[key] = list(map(val.__getitem__, batch_idxs))
|
||||
|
||||
batch_ds = DataSet(batch_data, self.data_type, shared=self.shared)
|
||||
yield batch_idxs, batch_ds
|
||||
|
||||
|
||||
class SquadDataSet(DataSet):
|
||||
def __init__(self, data, data_type, shared=None, valid_idxs=None):
|
||||
super(SquadDataSet, self).__init__(data, data_type, shared=shared, valid_idxs=valid_idxs)
|
||||
|
||||
|
||||
def load_metadata(config, data_type):
|
||||
metadata_path = os.path.join(config.data_dir, "metadata_{}.json".format(data_type))
|
||||
with open(metadata_path, 'r') as fh:
|
||||
metadata = json.load(fh)
|
||||
for key, val in metadata.items():
|
||||
config.__setattr__(key, val)
|
||||
return metadata
|
||||
|
||||
|
||||
def read_data(config, data_type, ref, data_filter=None):
|
||||
data_path = os.path.join(config.data_dir, "data_{}.json".format(data_type))
|
||||
shared_path = os.path.join(config.data_dir, "shared_{}.json".format(data_type))
|
||||
with open(data_path, 'r') as fh:
|
||||
data = json.load(fh)
|
||||
with open(shared_path, 'r') as fh:
|
||||
shared = json.load(fh)
|
||||
|
||||
num_examples = len(next(iter(data.values())))
|
||||
if data_filter is None:
|
||||
valid_idxs = range(num_examples)
|
||||
else:
|
||||
mask = []
|
||||
keys = data.keys()
|
||||
values = data.values()
|
||||
for vals in zip(*values):
|
||||
each = {key: val for key, val in zip(keys, vals)}
|
||||
mask.append(data_filter(each, shared))
|
||||
valid_idxs = [idx for idx in range(len(mask)) if mask[idx]]
|
||||
|
||||
print("Loaded {}/{} examples from {}".format(len(valid_idxs), num_examples, data_type))
|
||||
|
||||
shared_path = os.path.join(config.out_dir, "shared.json")
|
||||
if not ref:
|
||||
word_counter = shared['lower_word_counter'] if config.lower_word else shared['word_counter']
|
||||
char_counter = shared['char_counter']
|
||||
pos_counter = shared['pos_counter']
|
||||
shared['word2idx'] = {word: idx + 2 for idx, word in
|
||||
enumerate(word for word, count in word_counter.items()
|
||||
if count > config.word_count_th)}
|
||||
shared['char2idx'] = {char: idx + 2 for idx, char in
|
||||
enumerate(char for char, count in char_counter.items()
|
||||
if count > config.char_count_th)}
|
||||
shared['pos2idx'] = {pos: idx + 2 for idx, pos in enumerate(pos_counter.keys())}
|
||||
NULL = "-NULL-"
|
||||
UNK = "-UNK-"
|
||||
shared['word2idx'][NULL] = 0
|
||||
shared['word2idx'][UNK] = 1
|
||||
shared['char2idx'][NULL] = 0
|
||||
shared['char2idx'][UNK] = 1
|
||||
shared['pos2idx'][NULL] = 0
|
||||
shared['pos2idx'][UNK] = 1
|
||||
json.dump({'word2idx': shared['word2idx'], 'char2idx': shared['char2idx'],
|
||||
'pos2idx': shared['pos2idx']}, open(shared_path, 'w'))
|
||||
else:
|
||||
new_shared = json.load(open(shared_path, 'r'))
|
||||
for key, val in new_shared.items():
|
||||
shared[key] = val
|
||||
|
||||
data_set = DataSet(data, data_type, shared=shared, valid_idxs=valid_idxs)
|
||||
return data_set
|
||||
|
||||
|
||||
def get_squad_data_filter(config):
|
||||
def data_filter(data_point, shared):
|
||||
assert shared is not None
|
||||
rx, rcx, q, cq, y = (data_point[key] for key in ('*x', '*cx', 'q', 'cq', 'y'))
|
||||
x, cx, stx = shared['x'], shared['cx'], shared['stx']
|
||||
if len(q) > config.ques_size_th:
|
||||
return False
|
||||
xi = x[rx[0]][rx[1]]
|
||||
if len(xi) > config.num_sents_th:
|
||||
return False
|
||||
if any(len(xij) > config.sent_size_th for xij in xi):
|
||||
return False
|
||||
stxi = stx[rx[0]][rx[1]]
|
||||
if any(nltk.tree.Tree.fromstring(s).height() > config.tree_height_th for s in stxi):
|
||||
return False
|
||||
return True
|
||||
return data_filter
|
||||
|
||||
|
||||
def update_config(config, data_sets):
|
||||
config.max_num_sents = 0
|
||||
config.max_sent_size = 0
|
||||
config.max_ques_size = 0
|
||||
config.max_word_size = 0
|
||||
config.max_tree_height = 0
|
||||
for data_set in data_sets:
|
||||
data = data_set.data
|
||||
shared = data_set.shared
|
||||
for idx in data_set.valid_idxs:
|
||||
rx = data['*x'][idx]
|
||||
q = data['q'][idx]
|
||||
sents = shared['x'][rx[0]][rx[1]]
|
||||
trees = map(nltk.tree.Tree.fromstring, shared['stx'][rx[0]][rx[1]])
|
||||
config.max_tree_height = max(config.max_tree_height, max(tree.height() for tree in trees))
|
||||
config.max_num_sents = max(config.max_num_sents, len(sents))
|
||||
config.max_sent_size = max(config.max_sent_size, max(map(len, sents)))
|
||||
config.max_word_size = max(config.max_word_size, max(len(word) for sent in sents for word in sent))
|
||||
if len(q) > 0:
|
||||
config.max_ques_size = max(config.max_ques_size, len(q))
|
||||
config.max_word_size = max(config.max_word_size, max(len(word) for word in q))
|
||||
|
||||
config.max_word_size = min(config.max_word_size, config.word_size_th)
|
||||
|
||||
config.char_vocab_size = len(data_sets[0].shared['char2idx'])
|
||||
config.word_emb_size = len(next(iter(data_sets[0].shared['word2vec'].values())))
|
||||
config.word_vocab_size = len(data_sets[0].shared['word2idx'])
|
||||
config.pos_vocab_size = len(data_sets[0].shared['pos2idx'])
|
67
tensorflow/SQuAD/tree/templates/visualizer.html
Normal file
67
tensorflow/SQuAD/tree/templates/visualizer.html
Normal file
|
@ -0,0 +1,67 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>{{ title }}</title>
|
||||
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.12.0/jquery.min.js"></script>
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/chroma-js/1.1.1/chroma.min.js"></script>
|
||||
<script>
|
||||
$(document).ready(function(){
|
||||
$(".att").each(function() {
|
||||
// var val = parseFloat($(this).text());
|
||||
var val = parseFloat($(this).attr("color"));
|
||||
var scale = chroma.scale(['white', 'red']);
|
||||
var color = scale(val).hex();
|
||||
$(this).attr("bgcolor", color);
|
||||
});
|
||||
})
|
||||
</script>
|
||||
</head>
|
||||
<style>
|
||||
table, th, td {border: 1px solid black}
|
||||
</style>
|
||||
<body>
|
||||
<h2>{{ title }}</h2>
|
||||
<table>
|
||||
<tr>
|
||||
<th>ID</th>
|
||||
<th>Question</th>
|
||||
<th>Answer</th>
|
||||
<th>Paragraph</th>
|
||||
</tr>
|
||||
{% for row in rows %}
|
||||
<tr>
|
||||
<td>{{ row.id }}</td>
|
||||
<td>
|
||||
{% for qj in row.ques %}
|
||||
{{ qj }}
|
||||
{% endfor %}
|
||||
</td>
|
||||
<td>{{ row.a }}</td>
|
||||
<td>
|
||||
<table>
|
||||
{% for xj, yj, y2j, ypj, yp2j in zip(row.para, row.y, row.y2, row.yp, row.yp2) %}
|
||||
<tr>
|
||||
{% for xjk, yjk, y2jk, ypjk in zip(xj, yj, y2j, ypj) %}
|
||||
<td class="att" color="{{ ypjk }}">
|
||||
{% if yjk or y2jk %}
|
||||
<b>{{ xjk }}</b>
|
||||
{% else %}
|
||||
{{ xjk }}
|
||||
{% endif %}
|
||||
</td>
|
||||
{% endfor %}
|
||||
</tr>
|
||||
<tr>
|
||||
{% for xjk, yp2jk in zip(xj, yp2j) %}
|
||||
<td class="att" color="{{ yp2jk }}">-</td>
|
||||
{% endfor %}
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
294
tensorflow/SQuAD/tree/test.ipynb
Normal file
294
tensorflow/SQuAD/tree/test.ipynb
Normal file
|
@ -0,0 +1,294 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import nltk\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"%matplotlib inline"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"(S (PRP I) (VP (VBP am) (NNP Sam)) (. .))\n",
|
||||
"(PRP I)\n",
|
||||
"(VP (VBP am) (NNP Sam))\n",
|
||||
"(VBP am)\n",
|
||||
"(NNP Sam)\n",
|
||||
"(. .)\n",
|
||||
"(S (PRP I) (VP (VBP am) (NNP Sam)) (. .))\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"string = \"(ROOT(S(NP (PRP I))(VP (VBP am)(NP (NNP Sam)))(. .)))\"\n",
|
||||
"tree = nltk.tree.Tree.fromstring(string)\n",
|
||||
"\n",
|
||||
"def load_compressed_tree(s):\n",
|
||||
"\n",
|
||||
" def compress_tree(tree):\n",
|
||||
" if len(tree) == 1:\n",
|
||||
" if isinstance(tree[0], nltk.tree.Tree):\n",
|
||||
" return compress_tree(tree[0])\n",
|
||||
" else:\n",
|
||||
" return tree\n",
|
||||
" else:\n",
|
||||
" for i, t in enumerate(tree):\n",
|
||||
" tree[i] = compress_tree(t)\n",
|
||||
" return tree\n",
|
||||
"\n",
|
||||
" return compress_tree(nltk.tree.Tree.fromstring(s))\n",
|
||||
"tree = load_compressed_tree(string)\n",
|
||||
"for t in tree.subtrees():\n",
|
||||
" print(t)\n",
|
||||
" \n",
|
||||
"print(str(tree))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"(ROOT I am Sam .)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(tree.flatten())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"['ROOT', 'S', 'NP', 'PRP', 'VP', 'VBP', 'NP', 'NNP', '.']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(list(t.label() for t in tree.subtrees()))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"d = json.load(open(\"data/squad/shared_dev.json\", 'r'))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"73"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"len(d['pos_counter'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'#': 6,\n",
|
||||
" '$': 80,\n",
|
||||
" \"''\": 1291,\n",
|
||||
" ',': 14136,\n",
|
||||
" '-LRB-': 1926,\n",
|
||||
" '-RRB-': 1925,\n",
|
||||
" '.': 9505,\n",
|
||||
" ':': 1455,\n",
|
||||
" 'ADJP': 3426,\n",
|
||||
" 'ADVP': 4936,\n",
|
||||
" 'CC': 9300,\n",
|
||||
" 'CD': 6216,\n",
|
||||
" 'CONJP': 191,\n",
|
||||
" 'DT': 26286,\n",
|
||||
" 'EX': 288,\n",
|
||||
" 'FRAG': 107,\n",
|
||||
" 'FW': 96,\n",
|
||||
" 'IN': 32564,\n",
|
||||
" 'INTJ': 12,\n",
|
||||
" 'JJ': 21452,\n",
|
||||
" 'JJR': 563,\n",
|
||||
" 'JJS': 569,\n",
|
||||
" 'LS': 7,\n",
|
||||
" 'LST': 1,\n",
|
||||
" 'MD': 1051,\n",
|
||||
" 'NAC': 19,\n",
|
||||
" 'NN': 34750,\n",
|
||||
" 'NNP': 28392,\n",
|
||||
" 'NNPS': 1400,\n",
|
||||
" 'NNS': 16716,\n",
|
||||
" 'NP': 91636,\n",
|
||||
" 'NP-TMP': 236,\n",
|
||||
" 'NX': 108,\n",
|
||||
" 'PDT': 89,\n",
|
||||
" 'POS': 1451,\n",
|
||||
" 'PP': 33278,\n",
|
||||
" 'PRN': 2085,\n",
|
||||
" 'PRP': 2320,\n",
|
||||
" 'PRP$': 1959,\n",
|
||||
" 'PRT': 450,\n",
|
||||
" 'QP': 838,\n",
|
||||
" 'RB': 7611,\n",
|
||||
" 'RBR': 301,\n",
|
||||
" 'RBS': 252,\n",
|
||||
" 'ROOT': 9587,\n",
|
||||
" 'RP': 454,\n",
|
||||
" 'RRC': 19,\n",
|
||||
" 'S': 21557,\n",
|
||||
" 'SBAR': 5009,\n",
|
||||
" 'SBARQ': 6,\n",
|
||||
" 'SINV': 135,\n",
|
||||
" 'SQ': 5,\n",
|
||||
" 'SYM': 17,\n",
|
||||
" 'TO': 5167,\n",
|
||||
" 'UCP': 143,\n",
|
||||
" 'UH': 15,\n",
|
||||
" 'VB': 4197,\n",
|
||||
" 'VBD': 8377,\n",
|
||||
" 'VBG': 3570,\n",
|
||||
" 'VBN': 7218,\n",
|
||||
" 'VBP': 2897,\n",
|
||||
" 'VBZ': 4146,\n",
|
||||
" 'VP': 33696,\n",
|
||||
" 'WDT': 1368,\n",
|
||||
" 'WHADJP': 5,\n",
|
||||
" 'WHADVP': 439,\n",
|
||||
" 'WHNP': 1927,\n",
|
||||
" 'WHPP': 153,\n",
|
||||
" 'WP': 482,\n",
|
||||
" 'WP$': 50,\n",
|
||||
" 'WRB': 442,\n",
|
||||
" 'X': 23,\n",
|
||||
" '``': 1269}"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"d['pos_counter']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[[False False False False]\n",
|
||||
" [False True False False]\n",
|
||||
" [False False False False]]\n",
|
||||
"[[0 2 2 0]\n",
|
||||
" [2 2 0 2]\n",
|
||||
" [2 0 0 0]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from my.nltk_utils import tree2matrix, load_compressed_tree, find_max_f1_subtree, set_span\n",
|
||||
"string = \"(ROOT(S(NP (PRP I))(VP (VBP am)(NP (NNP Sam)))(. .)))\"\n",
|
||||
"tree = load_compressed_tree(string)\n",
|
||||
"span = (1, 3)\n",
|
||||
"set_span(tree)\n",
|
||||
"subtree = find_max_f1_subtree(tree, span)\n",
|
||||
"f = lambda t: t == subtree\n",
|
||||
"g = lambda t: 1 if isinstance(t, str) else 2\n",
|
||||
"a, b = tree2matrix(tree, f, dtype='bool')\n",
|
||||
"c, d = tree2matrix(tree, g, dtype='int32')\n",
|
||||
"print(a)\n",
|
||||
"print(c)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.5.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
36
tensorflow/SQuAD/tree/trainer.py
Normal file
36
tensorflow/SQuAD/tree/trainer.py
Normal file
|
@ -0,0 +1,36 @@
|
|||
import tensorflow as tf
|
||||
|
||||
from tree.model import Model
|
||||
|
||||
|
||||
class Trainer(object):
|
||||
def __init__(self, config, model):
|
||||
assert isinstance(model, Model)
|
||||
self.config = config
|
||||
self.model = model
|
||||
self.opt = tf.train.AdagradOptimizer(config.init_lr)
|
||||
self.loss = model.get_loss()
|
||||
self.var_list = model.get_var_list()
|
||||
self.global_step = model.get_global_step()
|
||||
self.ema_op = model.ema_op
|
||||
self.summary = model.summary
|
||||
self.grads = self.opt.compute_gradients(self.loss, var_list=self.var_list)
|
||||
opt_op = self.opt.apply_gradients(self.grads, global_step=self.global_step)
|
||||
|
||||
# Define train op
|
||||
with tf.control_dependencies([opt_op]):
|
||||
self.train_op = tf.group(self.ema_op)
|
||||
|
||||
def get_train_op(self):
|
||||
return self.train_op
|
||||
|
||||
def step(self, sess, batch, get_summary=False):
|
||||
assert isinstance(sess, tf.Session)
|
||||
feed_dict = self.model.get_feed_dict(batch, True)
|
||||
if get_summary:
|
||||
loss, summary, train_op = \
|
||||
sess.run([self.loss, self.summary, self.train_op], feed_dict=feed_dict)
|
||||
else:
|
||||
loss, train_op = sess.run([self.loss, self.train_op], feed_dict=feed_dict)
|
||||
summary = None
|
||||
return loss, summary, train_op
|
122
tensorflow/SQuAD/tree/visualizer.py
Normal file
122
tensorflow/SQuAD/tree/visualizer.py
Normal file
|
@ -0,0 +1,122 @@
|
|||
import shutil
|
||||
from collections import OrderedDict
|
||||
import http.server
|
||||
import socketserver
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
|
||||
|
||||
def bool_(string):
|
||||
if string == 'True':
|
||||
return True
|
||||
elif string == 'False':
|
||||
return False
|
||||
else:
|
||||
raise Exception()
|
||||
|
||||
def get_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--model_name", type=str, default='basic')
|
||||
parser.add_argument("--data_type", type=str, default='dev')
|
||||
parser.add_argument("--step", type=int, default=5000)
|
||||
parser.add_argument("--template_name", type=str, default="visualizer.html")
|
||||
parser.add_argument("--num_per_page", type=int, default=100)
|
||||
parser.add_argument("--data_dir", type=str, default="data/squad")
|
||||
parser.add_argument("--port", type=int, default=8000)
|
||||
parser.add_argument("--host", type=str, default="0.0.0.0")
|
||||
parser.add_argument("--open", type=str, default='False')
|
||||
parser.add_argument("--run_id", type=str, default="0")
|
||||
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def _decode(decoder, sent):
|
||||
return " ".join(decoder[idx] for idx in sent)
|
||||
|
||||
|
||||
def accuracy2_visualizer(args):
|
||||
model_name = args.model_name
|
||||
data_type = args.data_type
|
||||
num_per_page = args.num_per_page
|
||||
data_dir = args.data_dir
|
||||
run_id = args.run_id.zfill(2)
|
||||
step = args.step
|
||||
|
||||
eval_path =os.path.join("out", model_name, run_id, "eval", "{}-{}.json".format(data_type, str(step).zfill(6)))
|
||||
eval_ = json.load(open(eval_path, 'r'))
|
||||
|
||||
_id = 0
|
||||
html_dir = "/tmp/list_results%d" % _id
|
||||
while os.path.exists(html_dir):
|
||||
_id += 1
|
||||
html_dir = "/tmp/list_results%d" % _id
|
||||
|
||||
if os.path.exists(html_dir):
|
||||
shutil.rmtree(html_dir)
|
||||
os.mkdir(html_dir)
|
||||
|
||||
cur_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
templates_dir = os.path.join(cur_dir, 'templates')
|
||||
env = Environment(loader=FileSystemLoader(templates_dir))
|
||||
env.globals.update(zip=zip, reversed=reversed)
|
||||
template = env.get_template(args.template_name)
|
||||
|
||||
data_path = os.path.join(data_dir, "data_{}.json".format(data_type))
|
||||
shared_path = os.path.join(data_dir, "shared_{}.json".format(data_type))
|
||||
data = json.load(open(data_path, 'r'))
|
||||
shared = json.load(open(shared_path, 'r'))
|
||||
|
||||
rows = []
|
||||
for i, (idx, yi, ypi) in enumerate(zip(*[eval_[key] for key in ('idxs', 'y', 'yp')])):
|
||||
id_, q, rx = (data[key][idx] for key in ('ids', 'q', '*x'))
|
||||
x = shared['x'][rx[0]][rx[1]]
|
||||
ques = [" ".join(q)]
|
||||
para = [[word for word in sent] for sent in x]
|
||||
row = {
|
||||
'id': id_,
|
||||
'title': "Hello world!",
|
||||
'ques': ques,
|
||||
'para': para,
|
||||
'y': yi,
|
||||
'y2': yi,
|
||||
'yp': ypi,
|
||||
'yp2': ypi,
|
||||
'a': ""
|
||||
}
|
||||
rows.append(row)
|
||||
|
||||
if i % num_per_page == 0:
|
||||
html_path = os.path.join(html_dir, "%s.html" % str(i).zfill(8))
|
||||
|
||||
if (i + 1) % num_per_page == 0 or (i + 1) == len(eval_['y']):
|
||||
var_dict = {'title': "Accuracy Visualization",
|
||||
'rows': rows
|
||||
}
|
||||
with open(html_path, "wb") as f:
|
||||
f.write(template.render(**var_dict).encode('UTF-8'))
|
||||
rows = []
|
||||
|
||||
os.chdir(html_dir)
|
||||
port = args.port
|
||||
host = args.host
|
||||
# Overriding to suppress log message
|
||||
class MyHandler(http.server.SimpleHTTPRequestHandler):
|
||||
def log_message(self, format, *args):
|
||||
pass
|
||||
handler = MyHandler
|
||||
httpd = socketserver.TCPServer((host, port), handler)
|
||||
if args.open == 'True':
|
||||
os.system("open http://%s:%d" % (args.host, args.port))
|
||||
print("serving at %s:%d" % (host, port))
|
||||
httpd.serve_forever()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
ARGS = get_args()
|
||||
accuracy2_visualizer(ARGS)
|
244
tensorflow/SQuAD/visualization/compare_models.py
Normal file
244
tensorflow/SQuAD/visualization/compare_models.py
Normal file
|
@ -0,0 +1,244 @@
|
|||
import numpy as np
|
||||
from collections import Counter
|
||||
import string
|
||||
import re
|
||||
import argparse
|
||||
import os
|
||||
import json
|
||||
import nltk
|
||||
from matplotlib_venn import venn2
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
|
||||
class Question:
|
||||
def __init__(self, id, question_text, ground_truth, model_names):
|
||||
self.id = id
|
||||
self.question_text = self.normalize_answer(question_text)
|
||||
self.question_head_ngram = []
|
||||
self.question_tokens = nltk.word_tokenize(self.question_text)
|
||||
for nc in range(3):
|
||||
self.question_head_ngram.append(' '.join(self.question_tokens[0:nc]))
|
||||
self.ground_truth = ground_truth
|
||||
self.model_names = model_names
|
||||
self.em = np.zeros(2)
|
||||
self.f1 = np.zeros(2)
|
||||
self.answer_text = []
|
||||
|
||||
def add_answers(self, answer_model_1, answer_model_2):
|
||||
self.answer_text.append(answer_model_1)
|
||||
self.answer_text.append(answer_model_2)
|
||||
self.eval()
|
||||
|
||||
def eval(self):
|
||||
for model_count in range(2):
|
||||
self.em[model_count] = self.metric_max_over_ground_truths(self.exact_match_score, self.answer_text[model_count], self.ground_truth)
|
||||
self.f1[model_count] = self.metric_max_over_ground_truths(self.f1_score, self.answer_text[model_count], self.ground_truth)
|
||||
|
||||
def normalize_answer(self, s):
|
||||
"""Lower text and remove punctuation, articles and extra whitespace."""
|
||||
def remove_articles(text):
|
||||
return re.sub(r'\b(a|an|the)\b', ' ', text)
|
||||
|
||||
def white_space_fix(text):
|
||||
return ' '.join(text.split())
|
||||
|
||||
def remove_punc(text):
|
||||
exclude = set(string.punctuation)
|
||||
return ''.join(ch for ch in text if ch not in exclude)
|
||||
|
||||
def lower(text):
|
||||
return text.lower()
|
||||
|
||||
return white_space_fix(remove_articles(remove_punc(lower(s))))
|
||||
|
||||
def f1_score(self, prediction, ground_truth):
|
||||
prediction_tokens = self.normalize_answer(prediction).split()
|
||||
ground_truth_tokens = self.normalize_answer(ground_truth).split()
|
||||
common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
|
||||
num_same = sum(common.values())
|
||||
if num_same == 0:
|
||||
return 0
|
||||
precision = 1.0 * num_same / len(prediction_tokens)
|
||||
recall = 1.0 * num_same / len(ground_truth_tokens)
|
||||
f1 = (2 * precision * recall) / (precision + recall)
|
||||
return f1
|
||||
|
||||
def exact_match_score(self, prediction, ground_truth):
|
||||
return (self.normalize_answer(prediction) == self.normalize_answer(ground_truth))
|
||||
|
||||
def metric_max_over_ground_truths(self, metric_fn, prediction, ground_truths):
|
||||
scores_for_ground_truths = []
|
||||
for ground_truth in ground_truths:
|
||||
score = metric_fn(prediction, ground_truth)
|
||||
scores_for_ground_truths.append(score)
|
||||
return max(scores_for_ground_truths)
|
||||
|
||||
|
||||
def safe_dict_access(in_dict, in_key, default_string='some junk string'):
|
||||
if in_key in in_dict:
|
||||
return in_dict[in_key]
|
||||
else:
|
||||
return default_string
|
||||
|
||||
|
||||
def aggregate_metrics(questions):
|
||||
total = len(questions)
|
||||
exact_match = np.zeros(2)
|
||||
f1_scores = np.zeros(2)
|
||||
|
||||
for mc in range(2):
|
||||
exact_match[mc] = 100 * np.sum(np.array([questions[x].em[mc] for x in questions])) / total
|
||||
f1_scores[mc] = 100 * np.sum(np.array([questions[x].f1[mc] for x in questions])) / total
|
||||
|
||||
model_names = questions[list(questions.keys())[0]].model_names
|
||||
print('\nAggregate Scores:')
|
||||
for model_count in range(2):
|
||||
print('Model {0} EM = {1:.2f}'.format(model_names[model_count], exact_match[model_count]))
|
||||
print('Model {0} F1 = {1:.2f}'.format(model_names[model_count], f1_scores[model_count]))
|
||||
|
||||
|
||||
def venn_diagram(questions, output_dir):
|
||||
em_model1_ids = [x for x in questions if questions[x].em[0] == 1]
|
||||
em_model2_ids = [x for x in questions if questions[x].em[1] == 1]
|
||||
model_names = questions[list(questions.keys())[0]].model_names
|
||||
print('\nVenn diagram')
|
||||
|
||||
correct_model1 = em_model1_ids
|
||||
correct_model2 = em_model2_ids
|
||||
correct_model1_and_model2 = list(set(em_model1_ids).intersection(set(em_model2_ids)))
|
||||
correct_model1_and_not_model2 = list(set(em_model1_ids) - set(em_model2_ids))
|
||||
correct_model2_and_not_model1 = list(set(em_model2_ids) - set(em_model1_ids))
|
||||
|
||||
print('{0} answers correctly = {1}'.format(model_names[0], len(correct_model1)))
|
||||
print('{0} answers correctly = {1}'.format(model_names[1], len(correct_model2)))
|
||||
print('Both answer correctly = {1}'.format(model_names[0], len(correct_model1_and_model2)))
|
||||
print('{0} correct & {1} incorrect = {2}'.format(model_names[0], model_names[1], len(correct_model1_and_not_model2)))
|
||||
print('{0} correct & {1} incorrect = {2}'.format(model_names[1], model_names[0], len(correct_model2_and_not_model1)))
|
||||
|
||||
plt.clf()
|
||||
venn_diagram_plot = venn2(
|
||||
subsets=(len(correct_model1_and_not_model2), len(correct_model2_and_not_model1), len(correct_model1_and_model2)),
|
||||
set_labels=('{0} correct'.format(model_names[0]), '{0} correct'.format(model_names[1]), 'Both correct'),
|
||||
set_colors=('r', 'b'),
|
||||
alpha=0.3,
|
||||
normalize_to=1
|
||||
)
|
||||
plt.savefig(os.path.join(output_dir, 'venn_diagram.png'))
|
||||
plt.close()
|
||||
return correct_model1, correct_model2, correct_model1_and_model2, correct_model1_and_not_model2, correct_model2_and_not_model1
|
||||
|
||||
|
||||
def get_head_ngrams(questions, num_grams):
|
||||
head_ngrams = []
|
||||
for question in questions.values():
|
||||
head_ngrams.append(question.question_head_ngram[num_grams])
|
||||
return head_ngrams
|
||||
|
||||
|
||||
def get_head_ngram_frequencies(questions, head_ngrams, num_grams):
|
||||
head_ngram_frequencies = {}
|
||||
for current_ngram in head_ngrams:
|
||||
head_ngram_frequencies[current_ngram] = 0
|
||||
for question in questions.values():
|
||||
head_ngram_frequencies[question.question_head_ngram[num_grams]] += 1
|
||||
return head_ngram_frequencies
|
||||
|
||||
|
||||
def get_head_ngram_statistics(questions, correct_model1, correct_model2, correct_model1_and_model2, correct_model1_and_not_model2, correct_model2_and_not_model1, output_dir, num_grams=2, top_count=25):
|
||||
# Head ngram statistics
|
||||
head_ngrams = get_head_ngrams(questions, num_grams)
|
||||
|
||||
# Get head_ngram_frequencies (hnf)
|
||||
hnf_all = get_head_ngram_frequencies(questions, head_ngrams, num_grams)
|
||||
hnf_correct_model1 = get_head_ngram_frequencies({qid: questions[qid] for qid in correct_model1}, head_ngrams, num_grams)
|
||||
hnf_correct_model2 = get_head_ngram_frequencies({qid: questions[qid] for qid in correct_model2}, head_ngrams, num_grams)
|
||||
hnf_correct_model1_and_model2 = get_head_ngram_frequencies({qid: questions[qid] for qid in correct_model1_and_model2}, head_ngrams, num_grams)
|
||||
hnf_correct_model1_and_not_model2 = get_head_ngram_frequencies({qid: questions[qid] for qid in correct_model1_and_not_model2}, head_ngrams, num_grams)
|
||||
hnf_correct_model2_and_not_model1 = get_head_ngram_frequencies({qid: questions[qid] for qid in correct_model2_and_not_model1}, head_ngrams, num_grams)
|
||||
|
||||
sorted_bigrams_all = sorted(hnf_all.items(), key=lambda x: x[1], reverse=True)
|
||||
top_bigrams = [x[0] for x in sorted_bigrams_all[0:top_count]]
|
||||
|
||||
counts_total = [hnf_all[x] for x in top_bigrams]
|
||||
counts_model1 = [hnf_correct_model1[x] for x in top_bigrams]
|
||||
counts_model2 = [hnf_correct_model2[x] for x in top_bigrams]
|
||||
counts_model1_and_model2 = [hnf_correct_model1_and_model2[x] for x in top_bigrams]
|
||||
counts_model1_and_not_model2 = [hnf_correct_model1_and_not_model2[x] for x in top_bigrams]
|
||||
counts_model2_and_not_model1 = [hnf_correct_model2_and_not_model1[x] for x in top_bigrams]
|
||||
|
||||
top_bigrams_with_counts = []
|
||||
for cc in range(len(top_bigrams)):
|
||||
top_bigrams_with_counts.append('{0} ({1})'.format(top_bigrams[cc], counts_total[cc]))
|
||||
|
||||
plt.clf()
|
||||
fig, ax = plt.subplots(figsize=(6, 10))
|
||||
|
||||
ylocs = list(range(top_count))
|
||||
counts_model1_percent = 100 * np.array(counts_model1) / np.array(counts_total)
|
||||
plt.barh([top_count - x for x in ylocs], counts_model1_percent, height=0.4, alpha=0.5, color='#EE3224', label=top_bigrams)
|
||||
counts_model2_percent = 100 * np.array(counts_model2) / np.array(counts_total)
|
||||
plt.barh([top_count - x+0.4 for x in ylocs], counts_model2_percent, height=0.4, alpha=0.5, color='#2432EE', label=top_bigrams )
|
||||
ax.set_yticks([top_count - x + 0.4 for x in ylocs])
|
||||
ax.set_yticklabels(top_bigrams_with_counts)
|
||||
ax.set_ylim([0.5, top_count+1])
|
||||
ax.set_xlim([0, 100])
|
||||
plt.subplots_adjust(left=0.28, right=0.9, top=0.9, bottom=0.1)
|
||||
plt.xlabel('Percentage of questions with correct answers')
|
||||
plt.ylabel('Top N-grams')
|
||||
plt.savefig(os.path.join(output_dir, 'ngram_stats_{0}.png'.format(num_grams)))
|
||||
plt.close()
|
||||
|
||||
|
||||
def read_json(filename):
|
||||
with open(filename) as filepoint:
|
||||
data = json.load(filepoint)
|
||||
return data
|
||||
|
||||
|
||||
def compare_models(dataset_file, predictions_m1_file, predictions_m2_file, output_dir, name_m1='Model 1', name_m2='Model 2'):
|
||||
dataset = read_json(dataset_file)['data']
|
||||
predictions_m1 = read_json(predictions_m1_file)
|
||||
predictions_m2 = read_json(predictions_m2_file)
|
||||
|
||||
# Read in data
|
||||
total = 0
|
||||
questions = {}
|
||||
for article in dataset:
|
||||
for paragraph in article['paragraphs']:
|
||||
for qa in paragraph['qas']:
|
||||
current_question = Question(id=qa['id'], question_text=qa['question'], ground_truth=list(map(lambda x: x['text'], qa['answers'])), model_names=[name_m1, name_m2])
|
||||
current_question.add_answers(answer_model_1=safe_dict_access(predictions_m1, qa['id']), answer_model_2=safe_dict_access(predictions_m2, qa['id']))
|
||||
questions[current_question.id] = current_question
|
||||
total += 1
|
||||
model_names = questions[list(questions.keys())[0]].model_names
|
||||
print('Read in {0} questions'.format(total))
|
||||
|
||||
# Aggregate scores
|
||||
aggregate_metrics(questions)
|
||||
|
||||
# Venn diagram
|
||||
correct_model1, correct_model2, correct_model1_and_model2, correct_model1_and_not_model2, correct_model2_and_not_model1 = venn_diagram(questions, output_dir=output_dir)
|
||||
|
||||
# Head Unigram statistics
|
||||
get_head_ngram_statistics(questions, correct_model1, correct_model2, correct_model1_and_model2, correct_model1_and_not_model2,
|
||||
correct_model2_and_not_model1, output_dir, num_grams=1, top_count=10)
|
||||
|
||||
# Head Bigram statistics
|
||||
get_head_ngram_statistics(questions, correct_model1, correct_model2, correct_model1_and_model2, correct_model1_and_not_model2,
|
||||
correct_model2_and_not_model1, output_dir, num_grams=2, top_count=10)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='Compare two QA models')
|
||||
parser.add_argument('-dataset', action='store', dest='dataset', required=True, help='Dataset file')
|
||||
parser.add_argument('-model1', action='store', dest='predictions_m1', required=True, help='Prediction file for model 1')
|
||||
parser.add_argument('-model2', action='store', dest='predictions_m2', required=True, help='Prediction file for model 2')
|
||||
parser.add_argument('-name1', action='store', dest='name_m1', help='Name for model 1')
|
||||
parser.add_argument('-name2', action='store', dest='name_m2', help='Name for model 2')
|
||||
parser.add_argument('-output', action='store', dest='output_dir', help='Output directory for visualizations')
|
||||
results = parser.parse_args()
|
||||
|
||||
if results.name_m1 is not None and results.name_m2 is not None:
|
||||
compare_models(dataset_file=results.dataset, predictions_m1_file=results.predictions_m1, predictions_m2_file=results.predictions_m2, output_dir=results.output_dir, name_m1=results.name_m1, name_m2=results.name_m2)
|
||||
else:
|
||||
compare_models(dataset_file=results.dataset, predictions_m1_file=results.predictions_m1, predictions_m2_file=results.predictions_m2, output_dir=results.output_dir)
|
Loading…
Reference in a new issue