Less strict evaluation

94361772 · Frederik Arnold · cae5dd9b · 94361772
Commit 94361772 authored 4 years ago by Frederik Arnold
--- a/evaluation/Test2.py
+++ b/evaluation/Test2.py
@@ -14,6 +14,9 @@ class GoldItem:
        self.text = text
        self.item_type = item_type

+    def __hash__(self) -> int:
+        return hash((self.start, self.end, self.text))
+
    def __str__(self):
        return "Gold Item (" + str(self.start) + ":" + str(self.end) + ", " + self.text + ")"

@@ -40,6 +43,8 @@ scientific_path = args.scientific_work_path
 gold_path = args.gold_path
 output_path = args.output_path

+eval_strict = True
+

 def process_file(queue, literature_content, scientific_file, gold_file, pos):
    filename = splitext(basename(scientific_file))[0]
@@ -77,34 +82,73 @@ def process_file(queue, literature_content, scientific_file, gold_file, pos):
    num_matches = len(similarities)
    true_positives_count = 0

-    found_gold_items = []
-    found_false_positives = []
+    found_gold_items = set()
+    found_false_positives = set()
+
+    non_lit1_quotes_count = 0

    for i in range(0, num_matches):
        start = similarities[i][1].character_start_pos
        end = similarities[i][1].character_end_pos

-        found = False
-
-        for gold_item in gold_items:
-            if gold_item.start - 20 <= start <= gold_item.start + 20 and gold_item.end - 20 <= end <= gold_item.end + 20:
-                found = True
-                true_positives_count += 1
-                found_gold_items.append(gold_item)
-                print_line('found: ' + str(i + 1) + ', gold item: ' + str(gold_item), output_file)
-
-        if not found:
-            found_false_positives.append(i)
+        if eval_strict:
+            found = False
+
+            for gold_item in gold_items:
+                if gold_item.start - 20 <= start <= gold_item.start + 20 and gold_item.end - 20 <= end <= gold_item.end + 20:
+                    if gold_item.item_type == 0:
+                        found = True
+                        true_positives_count += 1
+                        found_gold_items.add(gold_item)
+                        print_line('found: ' + str(i + 1) + ', gold item: ' + str(gold_item), output_file)
+                        break
+
+            if not found:
+                found_false_positives.add((i, 1.0))
+        else:
+            found = False
+            best_percentage = 0
+
+            item_length = end - start
+            max_extra = min(100, item_length)
+
+            for gold_item in gold_items:
+                if gold_item.start - max_extra <= start and end <= gold_item.end + max_extra:
+                    overlap_start = max(start, gold_item.start)
+                    overlap_end = min(end, gold_item.end)
+                    overlap_length = overlap_end - overlap_start
+                    percentage = overlap_length / item_length
+
+                    if percentage > 0.5:
+                        if gold_item.item_type == 0:
+                            found = True
+                            true_positives_count += 1
+                            found_gold_items.add(gold_item)
+                            print_line('found: ' + str(i + 1) + ' (' + str(percentage) + ')' + ', gold item: ' + str(gold_item), output_file)
+                        else:
+                            non_lit1_quotes_count += 1
+                            found = True
+                            print_line('found non lit1: ' + str(i + 1) + ' (' + str(percentage) + ')' + ', gold item: ' + str(gold_item), output_file)
+                            break
+
+                    if percentage > best_percentage:
+                        best_percentage = percentage
+
+            if not found:
+                found_false_positives.add((i, best_percentage))

    print_line('\nFalse positives:', output_file)

-    for i in found_false_positives:
-        start_lit = similarities[i][0].character_start_pos
-        end_lit = similarities[i][0].character_end_pos
-        start_int = similarities[i][1].character_start_pos
-        end_int = similarities[i][1].character_end_pos
+    for false_positive in found_false_positives:
+        pos = false_positive[0]
+        percentage = false_positive[1]
+
+        start_lit = similarities[pos][0].character_start_pos
+        end_lit = similarities[pos][0].character_end_pos
+        start_int = similarities[pos][1].character_start_pos
+        end_int = similarities[pos][1].character_end_pos

-        print_line('false positive: ' + str(i + 1) + ': lit (' + str(start_lit) + ':' + str(end_lit) + '), int (' + str(
+        print_line('false positive: ' + str(pos + 1) + ' (' + str(percentage) + ')' + ': lit (' + str(start_lit) + ':' + str(end_lit) + '), int (' + str(
            start_int) + ':' + str(end_int) + '): ' + scientific_content[start_int:end_int], output_file)

    print_line('\nFalse negatives:', output_file)
@@ -117,10 +161,10 @@ def process_file(queue, literature_content, scientific_file, gold_file, pos):
    recall = 0

    if num_matches > 0:
-        precision = true_positives_count / num_matches
+        precision = true_positives_count / (num_matches - non_lit1_quotes_count)

    if num_gold_items > 0:
-        recall = true_positives_count / num_gold_items
+        recall = len(found_gold_items) / num_gold_items

    result = Result(pos, filename, num_matches, num_gold_items, precision, recall, true_positives_count)