dawn-bench-models/tensorflow/SQuAD/tree/evaluator.py

import numpy as np
import tensorflow as tf

from tree.read_data import DataSet
from my.nltk_utils import span_f1


class Evaluation(object):
    def __init__(self, data_type, global_step, idxs, yp):
        self.data_type = data_type
        self.global_step = global_step
        self.idxs = idxs
        self.yp = yp
        self.num_examples = len(yp)
        self.dict = {'data_type': data_type,
                     'global_step': global_step,
                     'yp': yp,
                     'idxs': idxs,
                     'num_examples': self.num_examples}
        self.summaries = None

    def __repr__(self):
        return "{} step {}".format(self.data_type, self.global_step)

    def __add__(self, other):
        if other == 0:
            return self
        assert self.data_type == other.data_type
        assert self.global_step == other.global_step
        new_yp = self.yp + other.yp
        new_idxs = self.idxs + other.idxs
        return Evaluation(self.data_type, self.global_step, new_idxs, new_yp)

    def __radd__(self, other):
        return self.__add__(other)


class LabeledEvaluation(Evaluation):
    def __init__(self, data_type, global_step, idxs, yp, y):
        super(LabeledEvaluation, self).__init__(data_type, global_step, idxs, yp)
        self.y = y
        self.dict['y'] = y

    def __add__(self, other):
        if other == 0:
            return self
        assert self.data_type == other.data_type
        assert self.global_step == other.global_step
        new_yp = self.yp + other.yp
        new_y = self.y + other.y
        new_idxs = self.idxs + other.idxs
        return LabeledEvaluation(self.data_type, self.global_step, new_idxs, new_yp, new_y)


class AccuracyEvaluation(LabeledEvaluation):
    def __init__(self, data_type, global_step, idxs, yp, y, correct, loss):
        super(AccuracyEvaluation, self).__init__(data_type, global_step, idxs, yp, y)
        self.loss = loss
        self.correct = correct
        self.acc = sum(correct) / len(correct)
        self.dict['loss'] = loss
        self.dict['correct'] = correct
        self.dict['acc'] = self.acc
        loss_summary = tf.Summary(value=[tf.Summary.Value(tag='dev/loss', simple_value=self.loss)])
        acc_summary = tf.Summary(value=[tf.Summary.Value(tag='dev/acc', simple_value=self.acc)])
        self.summaries = [loss_summary, acc_summary]

    def __repr__(self):
        return "{} step {}: accuracy={}, loss={}".format(self.data_type, self.global_step, self.acc, self.loss)

    def __add__(self, other):
        if other == 0:
            return self
        assert self.data_type == other.data_type
        assert self.global_step == other.global_step
        new_idxs = self.idxs + other.idxs
        new_yp = self.yp + other.yp
        new_y = self.y + other.y
        new_correct = self.correct + other.correct
        new_loss = (self.loss * self.num_examples + other.loss * other.num_examples) / len(new_correct)
        return AccuracyEvaluation(self.data_type, self.global_step, new_idxs, new_yp, new_y, new_correct, new_loss)


class Evaluator(object):
    def __init__(self, config, model):
        self.config = config
        self.model = model

    def get_evaluation(self, sess, batch):
        idxs, data_set = batch
        feed_dict = self.model.get_feed_dict(data_set, False, supervised=False)
        global_step, yp = sess.run([self.model.global_step, self.model.yp], feed_dict=feed_dict)
        yp = yp[:data_set.num_examples]
        e = Evaluation(data_set.data_type, int(global_step), idxs, yp.tolist())
        return e

    def get_evaluation_from_batches(self, sess, batches):
        e = sum(self.get_evaluation(sess, batch) for batch in batches)
        return e


class LabeledEvaluator(Evaluator):
    def get_evaluation(self, sess, batch):
        idxs, data_set = batch
        feed_dict = self.model.get_feed_dict(data_set, False, supervised=False)
        global_step, yp = sess.run([self.model.global_step, self.model.yp], feed_dict=feed_dict)
        yp = yp[:data_set.num_examples]
        y = feed_dict[self.model.y]
        e = LabeledEvaluation(data_set.data_type, int(global_step), idxs, yp.tolist(), y.tolist())
        return e


class AccuracyEvaluator(LabeledEvaluator):
    def get_evaluation(self, sess, batch):
        idxs, data_set = batch
        assert isinstance(data_set, DataSet)
        feed_dict = self.model.get_feed_dict(data_set, False)
        global_step, yp, loss = sess.run([self.model.global_step, self.model.yp, self.model.loss], feed_dict=feed_dict)
        y = feed_dict[self.model.y]
        yp = yp[:data_set.num_examples]
        correct = [self.__class__.compare(yi, ypi) for yi, ypi in zip(y, yp)]
        e = AccuracyEvaluation(data_set.data_type, int(global_step), idxs, yp.tolist(), y.tolist(), correct, float(loss))
        return e

    @staticmethod
    def compare(yi, ypi):
        return int(np.argmax(yi)) == int(np.argmax(ypi))


class AccuracyEvaluator2(AccuracyEvaluator):
    @staticmethod
    def compare(yi, ypi):
        i = int(np.argmax(yi.flatten()))
        j = int(np.argmax(ypi.flatten()))
        # print(i, j, i == j)
        return i == j


class TempEvaluation(AccuracyEvaluation):
    def __init__(self, data_type, global_step, idxs, yp, yp2, y, y2, correct, loss, f1s):
        super(TempEvaluation, self).__init__(data_type, global_step, idxs, yp, y, correct, loss)
        self.y2 = y2
        self.yp2 = yp2
        self.f1s = f1s
        self.f1 = float(np.mean(f1s))
        self.dict['y2'] = y2
        self.dict['yp2'] = yp2
        self.dict['f1s'] = f1s
        self.dict['f1'] = self.f1
        f1_summary = tf.Summary(value=[tf.Summary.Value(tag='dev/f1', simple_value=self.f1)])
        self.summaries.append(f1_summary)

    def __add__(self, other):
        if other == 0:
            return self
        assert self.data_type == other.data_type
        assert self.global_step == other.global_step
        new_idxs = self.idxs + other.idxs
        new_yp = self.yp + other.yp
        new_yp2 = self.yp2 + other.yp2
        new_y = self.y + other.y
        new_y2 = self.y2 + other.y2
        new_correct = self.correct + other.correct
        new_f1s = self.f1s + other.f1s
        new_loss = (self.loss * self.num_examples + other.loss * other.num_examples) / len(new_correct)
        return TempEvaluation(self.data_type, self.global_step, new_idxs, new_yp, new_yp2, new_y, new_y2, new_correct, new_loss, new_f1s)


class TempEvaluator(LabeledEvaluator):
    def get_evaluation(self, sess, batch):
        idxs, data_set = batch
        assert isinstance(data_set, DataSet)
        feed_dict = self.model.get_feed_dict(data_set, False)
        global_step, yp, yp2, loss = sess.run([self.model.global_step, self.model.yp, self.model.yp2, self.model.loss], feed_dict=feed_dict)
        y, y2 = feed_dict[self.model.y], feed_dict[self.model.y2]
        yp, yp2 = yp[:data_set.num_examples], yp2[:data_set.num_examples]
        correct = [self.__class__.compare(yi, y2i, ypi, yp2i) for yi, y2i, ypi, yp2i in zip(y, y2, yp, yp2)]
        f1s = [self.__class__.span_f1(yi, y2i, ypi, yp2i) for yi, y2i, ypi, yp2i in zip(y, y2, yp, yp2)]
        e = TempEvaluation(data_set.data_type, int(global_step), idxs, yp.tolist(), yp2.tolist(), y.tolist(), y2.tolist(), correct, float(loss), f1s)
        return e

    @staticmethod
    def compare(yi, y2i, ypi, yp2i):
        i = int(np.argmax(yi.flatten()))
        j = int(np.argmax(ypi.flatten()))
        k = int(np.argmax(y2i.flatten()))
        l = int(np.argmax(yp2i.flatten()))
        # print(i, j, i == j)
        return i == j and k == l

    @staticmethod
    def span_f1(yi, y2i, ypi, yp2i):
        true_span = (np.argmax(yi.flatten()), np.argmax(y2i.flatten())+1)
        pred_span = (np.argmax(ypi.flatten()), np.argmax(yp2i.flatten())+1)
        f1 = span_f1(true_span, pred_span)
        return f1
First commit 2017-08-17 12:43:17 -06:00			`import numpy as np`
			`import tensorflow as tf`

			`from tree.read_data import DataSet`
			`from my.nltk_utils import span_f1`


			`class Evaluation(object):`
			`def __init__(self, data_type, global_step, idxs, yp):`
			`self.data_type = data_type`
			`self.global_step = global_step`
			`self.idxs = idxs`
			`self.yp = yp`
			`self.num_examples = len(yp)`
			`self.dict = {'data_type': data_type,`
			`'global_step': global_step,`
			`'yp': yp,`
			`'idxs': idxs,`
			`'num_examples': self.num_examples}`
			`self.summaries = None`

			`def __repr__(self):`
			`return "{} step {}".format(self.data_type, self.global_step)`

			`def __add__(self, other):`
			`if other == 0:`
			`return self`
			`assert self.data_type == other.data_type`
			`assert self.global_step == other.global_step`
			`new_yp = self.yp + other.yp`
			`new_idxs = self.idxs + other.idxs`
			`return Evaluation(self.data_type, self.global_step, new_idxs, new_yp)`

			`def __radd__(self, other):`
			`return self.__add__(other)`


			`class LabeledEvaluation(Evaluation):`
			`def __init__(self, data_type, global_step, idxs, yp, y):`
			`super(LabeledEvaluation, self).__init__(data_type, global_step, idxs, yp)`
			`self.y = y`
			`self.dict['y'] = y`

			`def __add__(self, other):`
			`if other == 0:`
			`return self`
			`assert self.data_type == other.data_type`
			`assert self.global_step == other.global_step`
			`new_yp = self.yp + other.yp`
			`new_y = self.y + other.y`
			`new_idxs = self.idxs + other.idxs`
			`return LabeledEvaluation(self.data_type, self.global_step, new_idxs, new_yp, new_y)`


			`class AccuracyEvaluation(LabeledEvaluation):`
			`def __init__(self, data_type, global_step, idxs, yp, y, correct, loss):`
			`super(AccuracyEvaluation, self).__init__(data_type, global_step, idxs, yp, y)`
			`self.loss = loss`
			`self.correct = correct`
			`self.acc = sum(correct) / len(correct)`
			`self.dict['loss'] = loss`
			`self.dict['correct'] = correct`
			`self.dict['acc'] = self.acc`
			`loss_summary = tf.Summary(value=[tf.Summary.Value(tag='dev/loss', simple_value=self.loss)])`
			`acc_summary = tf.Summary(value=[tf.Summary.Value(tag='dev/acc', simple_value=self.acc)])`
			`self.summaries = [loss_summary, acc_summary]`

			`def __repr__(self):`
			`return "{} step {}: accuracy={}, loss={}".format(self.data_type, self.global_step, self.acc, self.loss)`

			`def __add__(self, other):`
			`if other == 0:`
			`return self`
			`assert self.data_type == other.data_type`
			`assert self.global_step == other.global_step`
			`new_idxs = self.idxs + other.idxs`
			`new_yp = self.yp + other.yp`
			`new_y = self.y + other.y`
			`new_correct = self.correct + other.correct`
			`new_loss = (self.loss * self.num_examples + other.loss * other.num_examples) / len(new_correct)`
			`return AccuracyEvaluation(self.data_type, self.global_step, new_idxs, new_yp, new_y, new_correct, new_loss)`


			`class Evaluator(object):`
			`def __init__(self, config, model):`
			`self.config = config`
			`self.model = model`

			`def get_evaluation(self, sess, batch):`
			`idxs, data_set = batch`
			`feed_dict = self.model.get_feed_dict(data_set, False, supervised=False)`
			`global_step, yp = sess.run([self.model.global_step, self.model.yp], feed_dict=feed_dict)`
			`yp = yp[:data_set.num_examples]`
			`e = Evaluation(data_set.data_type, int(global_step), idxs, yp.tolist())`
			`return e`

			`def get_evaluation_from_batches(self, sess, batches):`
			`e = sum(self.get_evaluation(sess, batch) for batch in batches)`
			`return e`


			`class LabeledEvaluator(Evaluator):`
			`def get_evaluation(self, sess, batch):`
			`idxs, data_set = batch`
			`feed_dict = self.model.get_feed_dict(data_set, False, supervised=False)`
			`global_step, yp = sess.run([self.model.global_step, self.model.yp], feed_dict=feed_dict)`
			`yp = yp[:data_set.num_examples]`
			`y = feed_dict[self.model.y]`
			`e = LabeledEvaluation(data_set.data_type, int(global_step), idxs, yp.tolist(), y.tolist())`
			`return e`


			`class AccuracyEvaluator(LabeledEvaluator):`
			`def get_evaluation(self, sess, batch):`
			`idxs, data_set = batch`
			`assert isinstance(data_set, DataSet)`
			`feed_dict = self.model.get_feed_dict(data_set, False)`
			`global_step, yp, loss = sess.run([self.model.global_step, self.model.yp, self.model.loss], feed_dict=feed_dict)`
			`y = feed_dict[self.model.y]`
			`yp = yp[:data_set.num_examples]`
			`correct = [self.__class__.compare(yi, ypi) for yi, ypi in zip(y, yp)]`
			`e = AccuracyEvaluation(data_set.data_type, int(global_step), idxs, yp.tolist(), y.tolist(), correct, float(loss))`
			`return e`

			`@staticmethod`
			`def compare(yi, ypi):`
			`return int(np.argmax(yi)) == int(np.argmax(ypi))`


			`class AccuracyEvaluator2(AccuracyEvaluator):`
			`@staticmethod`
			`def compare(yi, ypi):`
			`i = int(np.argmax(yi.flatten()))`
			`j = int(np.argmax(ypi.flatten()))`
			`# print(i, j, i == j)`
			`return i == j`


			`class TempEvaluation(AccuracyEvaluation):`
			`def __init__(self, data_type, global_step, idxs, yp, yp2, y, y2, correct, loss, f1s):`
			`super(TempEvaluation, self).__init__(data_type, global_step, idxs, yp, y, correct, loss)`
			`self.y2 = y2`
			`self.yp2 = yp2`
			`self.f1s = f1s`
			`self.f1 = float(np.mean(f1s))`
			`self.dict['y2'] = y2`
			`self.dict['yp2'] = yp2`
			`self.dict['f1s'] = f1s`
			`self.dict['f1'] = self.f1`
			`f1_summary = tf.Summary(value=[tf.Summary.Value(tag='dev/f1', simple_value=self.f1)])`
			`self.summaries.append(f1_summary)`

			`def __add__(self, other):`
			`if other == 0:`
			`return self`
			`assert self.data_type == other.data_type`
			`assert self.global_step == other.global_step`
			`new_idxs = self.idxs + other.idxs`
			`new_yp = self.yp + other.yp`
			`new_yp2 = self.yp2 + other.yp2`
			`new_y = self.y + other.y`
			`new_y2 = self.y2 + other.y2`
			`new_correct = self.correct + other.correct`
			`new_f1s = self.f1s + other.f1s`
			`new_loss = (self.loss * self.num_examples + other.loss * other.num_examples) / len(new_correct)`
			`return TempEvaluation(self.data_type, self.global_step, new_idxs, new_yp, new_yp2, new_y, new_y2, new_correct, new_loss, new_f1s)`


			`class TempEvaluator(LabeledEvaluator):`
			`def get_evaluation(self, sess, batch):`
			`idxs, data_set = batch`
			`assert isinstance(data_set, DataSet)`
			`feed_dict = self.model.get_feed_dict(data_set, False)`
			`global_step, yp, yp2, loss = sess.run([self.model.global_step, self.model.yp, self.model.yp2, self.model.loss], feed_dict=feed_dict)`
			`y, y2 = feed_dict[self.model.y], feed_dict[self.model.y2]`
			`yp, yp2 = yp[:data_set.num_examples], yp2[:data_set.num_examples]`
			`correct = [self.__class__.compare(yi, y2i, ypi, yp2i) for yi, y2i, ypi, yp2i in zip(y, y2, yp, yp2)]`
			`f1s = [self.__class__.span_f1(yi, y2i, ypi, yp2i) for yi, y2i, ypi, yp2i in zip(y, y2, yp, yp2)]`
			`e = TempEvaluation(data_set.data_type, int(global_step), idxs, yp.tolist(), yp2.tolist(), y.tolist(), y2.tolist(), correct, float(loss), f1s)`
			`return e`

			`@staticmethod`
			`def compare(yi, y2i, ypi, yp2i):`
			`i = int(np.argmax(yi.flatten()))`
			`j = int(np.argmax(ypi.flatten()))`
			`k = int(np.argmax(y2i.flatten()))`
			`l = int(np.argmax(yp2i.flatten()))`
			`# print(i, j, i == j)`
			`return i == j and k == l`

			`@staticmethod`
			`def span_f1(yi, y2i, ypi, yp2i):`
			`true_span = (np.argmax(yi.flatten()), np.argmax(y2i.flatten())+1)`
			`pred_span = (np.argmax(ypi.flatten()), np.argmax(yp2i.flatten())+1)`
			`f1 = span_f1(true_span, pred_span)`
			`return f1`