From ea17109ecd7570cc0a50fc4f55c4bb15e89cb746 Mon Sep 17 00:00:00 2001
From: Frederik Arnold <frederik.arnold@hu-berlin.de>
Date: Fri, 21 Mar 2025 14:36:02 +0100
Subject: [PATCH] Refactor cli

---
 README.md                                     |   5 +-
 indiquo/cli/IndiQuoCLI.py                     | 163 ++++++++++++------
 ...Predictor.py => BaseCandidatePredictor.py} |   2 +-
 indiquo/core/BaseScenePredictor.py            |  10 ++
 indiquo/core/CandidatePredictor.py            |   5 +-
 indiquo/core/CandidatePredictorDummy.py       |   7 +-
 indiquo/core/CandidatePredictorRW.py          |  15 +-
 indiquo/core/CandidatePredictorST.py          |   4 +-
 indiquo/core/IndiQuo.py                       |   6 +-
 indiquo/core/IndiQuoBase.py                   |   1 -
 indiquo/core/IndiQuoSum.py                    |   4 -
 indiquo/core/ScenePredictor.py                |   4 +-
 indiquo/core/ScenePredictorDummy.py           |  14 ++
 pyproject.toml                                |  12 +-
 requirements.txt                              |  14 +-
 test/TestSentenceChunker.py                   |   4 +-
 16 files changed, 175 insertions(+), 95 deletions(-)
 rename indiquo/core/{BasePredictor.py => BaseCandidatePredictor.py} (85%)
 create mode 100644 indiquo/core/BaseScenePredictor.py
 create mode 100644 indiquo/core/ScenePredictorDummy.py

diff --git a/README.md b/README.md
index bd106ae..9d4d973 100644
--- a/README.md
+++ b/README.md
@@ -3,11 +3,8 @@ This repository contains the tool `IndiQuo` for the detection of indirect quotat
 between dramas from [DraCor](https://dracor.org) and scholarly works which interpret the drama.
 
 ## Installation
-
-Checkout this repository and then run:
-
 ~~~
-pip install -r requirements.txt
+pip install indiquo
 ~~~
 
 ### Dependencies
diff --git a/indiquo/cli/IndiQuoCLI.py b/indiquo/cli/IndiQuoCLI.py
index 748a2be..8daa9e0 100644
--- a/indiquo/cli/IndiQuoCLI.py
+++ b/indiquo/cli/IndiQuoCLI.py
@@ -1,3 +1,4 @@
+import argparse
 import logging
 import sys
 from argparse import ArgumentParser, BooleanOptionalAction
@@ -10,6 +11,7 @@ from indiquo.core.CandidatePredictorDummy import CandidatePredictorDummy
 from indiquo.core.CandidatePredictorSum import CandidatePredictorSum
 from indiquo.core.IndiQuoBase import IndiQuoBase
 from indiquo.core.IndiQuoSum import IndiQuoSum
+from indiquo.core.ScenePredictorDummy import ScenePredictorDummy
 from indiquo.training.scene import TrainSceneIdentification
 
 try:
@@ -30,6 +32,9 @@ import csv
 from indiquo.training.candidate import TrainCandidateClassifier, TrainCandidateClassifierST
 
 
+logger = logging.getLogger(__name__)
+
+
 def __train_candidate(train_folder_path, output_folder_path, model_name):
     TrainCandidateClassifier.train(train_folder_path, output_folder_path, model_name)
 
@@ -43,7 +48,7 @@ def __train_scene(train_folder_path, output_folder_path, model_name):
 
 
 def __process_file(indi_quo: IndiQuoBase, filename, target_text, output_folder_path):
-    print(f'Processing {filename} ...')
+    logger.info(f'Processing {filename} ...')
 
     matches = indi_quo.compare(target_text)
 
@@ -64,32 +69,44 @@ def __process_file(indi_quo: IndiQuoBase, filename, target_text, output_folder_p
             writer.writerow([m.target_start, m.target_end, speech_text, m.score, scene_predictions])
 
 
-def __run_compare(source_file_path, target_path, candidate_model_path, scene_model_path,
-                  output_folder_path, approach, add_context, max_candidate_length, summaries_file_path):
+def __run_compare(compare_approach, model_type, source_file_path, target_path, candidate_model_path, scene_model_path,
+                  output_folder_path, add_context, max_candidate_length, summaries_file_path):
     drama_processor = Dramatist()
     drama = drama_processor.from_file(source_file_path)
     sentence_chunker = SentenceChunker(min_length=10, max_length=64, max_sentences=1)
 
-    if approach in ['iq', 'st', 'rw']:
-        if approach == 'iq':
+    if compare_approach == 'candidate':
+        if model_type == 'iq':
             candidate_tokenizer = AutoTokenizer.from_pretrained(candidate_model_path)
             candidate_model = AutoModelForSequenceClassification.from_pretrained(candidate_model_path)
             candidate_predictor = CandidatePredictor(candidate_tokenizer, candidate_model, sentence_chunker,
                                                      add_context, max_candidate_length)
-        elif approach == 'st':
+        elif model_type == 'st':
             candidate_model = SentenceTransformer(candidate_model_path)
             candidate_predictor = CandidatePredictorST(drama, candidate_model, sentence_chunker, add_context,
                                                        max_candidate_length)
-        elif approach == 'rw':
+        elif model_type == 'rw':
             candidate_model = SequenceTagger.load(candidate_model_path)
             candidate_predictor = CandidatePredictorRW(candidate_model, sentence_chunker)
 
+        indi_quo = IndiQuo(candidate_predictor, ScenePredictorDummy())
+
+    elif compare_approach == 'scene':
+        candidate_predictor = CandidatePredictorDummy(sentence_chunker)
         scene_model = SentenceTransformer(scene_model_path)
         scene_predictor = ScenePredictor(drama, scene_model, 10)
-
         indi_quo = IndiQuo(candidate_predictor, scene_predictor)
+    elif compare_approach == 'full':
+        candidate_tokenizer = AutoTokenizer.from_pretrained(candidate_model_path)
+        candidate_model = AutoModelForSequenceClassification.from_pretrained(candidate_model_path)
+        candidate_predictor = CandidatePredictor(candidate_tokenizer, candidate_model, sentence_chunker,
+                                                 add_context, max_candidate_length)
+
+        scene_model = SentenceTransformer(scene_model_path)
+        scene_predictor = ScenePredictor(drama, scene_model, 10)
 
-    elif approach == 'sum':
+        indi_quo = IndiQuo(candidate_predictor, scene_predictor)
+    elif compare_approach == 'sum':
         summaries = []
         with open(summaries_file_path, 'r') as summary_file:
             reader = csv.reader(summary_file, delimiter='\t')
@@ -103,17 +120,8 @@ def __run_compare(source_file_path, target_path, candidate_model_path, scene_mod
 
         candidate_model = SentenceTransformer(candidate_model_path)
         candidate_predictor = CandidatePredictorSum(summaries, candidate_model, sentence_chunker)
-
         indi_quo = IndiQuoSum(candidate_predictor)
 
-    elif approach == 'eval':
-        candidate_predictor = CandidatePredictorDummy(sentence_chunker)
-        scene_model = SentenceTransformer(scene_model_path)
-        scene_predictor = ScenePredictor(drama, scene_model, 10)
-        indi_quo = IndiQuo(candidate_predictor, scene_predictor)
-    else:
-        raise Exception(f'Approach {approach} is unknown')
-
     if isfile(target_path) and target_path.endswith('.txt'):
         with open(target_path, 'r', encoding='utf-8') as target_file:
             target_file_content = target_file.read()
@@ -158,9 +166,9 @@ def main(argv=None):
     parser_train_candidate = subparsers_train_model.add_parser('candidate', help=train_candidate_description,
                                                                description=train_candidate_description)
 
-    parser_train_candidate.add_argument('train_folder_path', nargs=1, metavar='train-folder-path',
+    parser_train_candidate.add_argument('train_folder_path', metavar='train-folder-path',
                                         help='Path to the folder with training and validation data')
-    parser_train_candidate.add_argument('output_folder_path', nargs=1, metavar='output-folder-path',
+    parser_train_candidate.add_argument('output_folder_path', metavar='output-folder-path',
                                         help='Path to the output folder of the trained model')
     parser_train_candidate.add_argument('--model', dest='model', default='deepset/gbert-large',
                                         help='Name of the model on huggingface to use as the base model for fine-tuning'
@@ -170,9 +178,9 @@ def main(argv=None):
     parser_train_st = subparsers_train_model.add_parser('candidate_st', help=train_candidate_st_description,
                                                         description=train_candidate_st_description)
 
-    parser_train_st.add_argument('train_folder_path', nargs=1, metavar='train-folder-path',
+    parser_train_st.add_argument('train_folder_path', metavar='train-folder-path',
                                  help='Path to the folder with training and validation data')
-    parser_train_st.add_argument('output_folder_path', nargs=1, metavar='output-folder-path',
+    parser_train_st.add_argument('output_folder_path', metavar='output-folder-path',
                                  help='Path to the output folder of the trained model')
     parser_train_st.add_argument('--model', dest='model', default='deutsche-telekom/gbert-large-paraphrase-cosine',
                                  help='Name of the model on huggingface to use as the base model for fine-tuning'
@@ -181,9 +189,9 @@ def main(argv=None):
     parser_train_scene = subparsers_train_model.add_parser('scene', help=train_scene_description,
                                                            description=train_scene_description)
 
-    parser_train_scene.add_argument('train_folder_path', nargs=1, metavar='train-folder-path',
+    parser_train_scene.add_argument('train_folder_path', metavar='train-folder-path',
                                     help='Path to the folder with training and validation data')
-    parser_train_scene.add_argument('output_folder_path', nargs=1, metavar='output-folder-path',
+    parser_train_scene.add_argument('output_folder_path', metavar='output-folder-path',
                                     help='Path to the input folder')
     parser_train_scene.add_argument('--model', dest='model', default='deutsche-telekom/gbert-large-paraphrase-cosine',
                                     help='Name of the model on huggingface to use as the base model for fine-tuning'
@@ -192,35 +200,68 @@ def main(argv=None):
     parser_compare = subparsers_command.add_parser('compare', help=compare_description,
                                                    description=compare_description)
 
-    parser_compare.add_argument('source_file_path', nargs=1, metavar='source-file-path',
-                                help='Path to the source xml drama file')
-    parser_compare.add_argument('target_path', nargs=1, metavar='target-path',
-                                help='Path to the target text file or folder')
-    parser_compare.add_argument('candidate_model_folder_path', nargs=1, metavar='candidate-model-folder-path',
-                                help='Path to the candidate model folder')
-    parser_compare.add_argument('scene_model_folder_path', nargs=1, metavar='scene-model-folder-path',
-                                help='Path to the scene model folder')
-    parser_compare.add_argument('output_folder_path', nargs=1, metavar='output-folder-path',
-                                help='The output folder path')
-    parser_compare.add_argument('--approach', choices=['st', 'rw', 'iq', 'sum', 'eval'], dest='approach',
-                                default='iq', help='The approach to use for candidate prediction')
-    parser_compare.add_argument('--add-context', dest='add_context', default=True,
+    subparsers_compare_approach = parser_compare.add_subparsers(dest='compare_approach')
+    subparsers_compare_approach.required = True
+
+    cp_all = argparse.ArgumentParser(add_help=False)
+    cp_all.add_argument('source_file_path', metavar='source-file-path', help='Path to the source xml drama file')
+    cp_all.add_argument('target_path', metavar='target-path', help='Path to the target text file or folder')
+
+    cp_candidate_full = argparse.ArgumentParser(add_help=False)
+    cp_candidate_full.add_argument('--add-context', dest='add_context', default=True,
                                 action=BooleanOptionalAction, help='If set, candidates are embedded in context up to'
                                                                    'a total length of --max-candidate-length')
-    parser_compare.add_argument('--max-candidate-length', dest='max_candidate_length', default=128,
+    cp_candidate_full.add_argument('--max-candidate-length', dest='max_candidate_length', default=128,
                                 type=int, help='Maximum length in words of a candidate (default: %(default)d)')
-    parser_compare.add_argument('--summaries-file-path', dest='summaries_file_path', required=False,
+
+    cp_candidate_model = argparse.ArgumentParser(add_help=False)
+    cp_candidate_model.add_argument('candidate_model_folder_path', metavar='candidate-model-folder-path',
+                                help='Path to the candidate model folder')
+    cp_scene_model = argparse.ArgumentParser(add_help=False)
+    cp_scene_model.add_argument('scene_model_folder_path', metavar='scene-model-folder-path',
+                                help='Path to the scene model folder')
+    cp_output = argparse.ArgumentParser(add_help=False)
+    cp_output.add_argument('output_folder_path', metavar='output-folder-path',
+                                help='The output folder path')
+
+    parser_compare_candidate = (
+        subparsers_compare_approach.add_parser('candidate',
+                                               parents=[cp_all, cp_candidate_model, cp_output, cp_candidate_full],
+                                               help='TBD', description='TBD')
+
+    )
+    parser_compare_candidate.add_argument('--model-type', choices=['st', 'rw', 'iq'], dest='model_type',
+                              default='iq', help='The model type to use for candidate prediction')
+
+    parser_compare_scene = (
+        subparsers_compare_approach.add_parser('scene',
+                                               parents=[cp_all, cp_scene_model, cp_output],
+                                               help='TBD', description='TBD')
+    )
+
+    parser_compare_full = (
+        subparsers_compare_approach.add_parser('full',
+                                               parents=[cp_all, cp_candidate_model, cp_scene_model, cp_output, cp_candidate_full],
+                                               help='TBD', description='TBD')
+    )
+
+    parser_compare_sum = (
+        subparsers_compare_approach.add_parser('sum',
+                                               parents = [cp_all, cp_candidate_model, cp_output],
+                                               help='TBD', description='TBD')
+    )
+    parser_compare_sum.add_argument('--summaries-file-path', dest='summaries_file_path', required=False,
                                 help='Path to the summaries tsv file. Only used if approach is set to \'sum\'')
 
     args = argument_parser.parse_args(argv)
 
     log_level = args.log_level
-    logging.getLogger().setLevel(logging.getLevelName(log_level))
+    logging.basicConfig(level=log_level, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 
     if args.command == 'train':
         if args.train_model == 'candidate' or args.train_model == 'candidate_st' or args.train_model == 'scene':
-            train_folder_path = args.train_folder_path[0]
-            output_folder_path = args.output_folder_path[0]
+            train_folder_path = args.train_folder_path
+            output_folder_path = args.output_folder_path
             model = args.model
             model_name_repl = model.replace('/', '')
 
@@ -238,18 +279,32 @@ def main(argv=None):
                 __train_scene(train_folder_path, output_folder_path, model)
 
     elif args.command == 'compare':
-        source_file_path = args.source_file_path[0]
-        target_path = args.target_path[0]
-        candidate_model_folder_path = args.candidate_model_folder_path[0]
-        scene_model_folder_path = args.scene_model_folder_path[0]
-        output_folder_path = args.output_folder_path[0]
-        approach = args.approach
-        add_context = args.add_context
-        max_candidate_length = args.max_candidate_length
+        source_file_path = args.source_file_path
+        target_path = args.target_path
+        output_folder_path = args.output_folder_path
 
-        summaries_file_path = None
+        c_appr = args.compare_approach
+
+        candidate_model_folder_path = None
+        if c_appr in ['candidate', 'full', 'sum']:
+            candidate_model_folder_path = args.candidate_model_folder_path
+
+        scene_model_folder_path = None
+        if c_appr in ['scene', 'full']:
+            scene_model_folder_path = args.scene_model_folder_path
 
-        if approach == 'sum':
+        add_context = True
+        max_candidate_length = 128
+        if c_appr in ['candidate', 'full']:
+            add_context = args.add_context
+            max_candidate_length = args.max_candidate_length
+
+        model_type = None
+        if c_appr == 'candidate':
+            model_type = args.model_type
+
+        summaries_file_path = None
+        if c_appr == 'sum':
             summaries_file_path = args.summaries_file_path
 
         now = datetime.now()
@@ -257,8 +312,8 @@ def main(argv=None):
         output_folder_path = join(output_folder_path, date_time_string)
         Path(output_folder_path).mkdir(parents=True, exist_ok=True)
 
-        __run_compare(source_file_path, target_path, candidate_model_folder_path, scene_model_folder_path,
-                      output_folder_path, approach, add_context, max_candidate_length, summaries_file_path)
+        __run_compare(c_appr, model_type, source_file_path, target_path, candidate_model_folder_path, scene_model_folder_path,
+                      output_folder_path, add_context, max_candidate_length, summaries_file_path)
 
 
 if __name__ == '__main__':
diff --git a/indiquo/core/BasePredictor.py b/indiquo/core/BaseCandidatePredictor.py
similarity index 85%
rename from indiquo/core/BasePredictor.py
rename to indiquo/core/BaseCandidatePredictor.py
index 56b437d..6d5467e 100644
--- a/indiquo/core/BasePredictor.py
+++ b/indiquo/core/BaseCandidatePredictor.py
@@ -3,7 +3,7 @@ from typing import List
 from indiquo.core.Candidate import Candidate
 
 
-class BasePredictor(ABC):
+class BaseCandidatePredictor(ABC):
 
     @abstractmethod
     def get_candidates(self, target_text) -> List[Candidate]:
diff --git a/indiquo/core/BaseScenePredictor.py b/indiquo/core/BaseScenePredictor.py
new file mode 100644
index 0000000..9380fc9
--- /dev/null
+++ b/indiquo/core/BaseScenePredictor.py
@@ -0,0 +1,10 @@
+from abc import ABC, abstractmethod
+from typing import List
+from indiquo.core.ScenePrediction import ScenePrediction
+
+
+class BaseScenePredictor(ABC):
+
+    @abstractmethod
+    def predict_scene(self, text) -> List[List[ScenePrediction]]:
+        pass
diff --git a/indiquo/core/CandidatePredictor.py b/indiquo/core/CandidatePredictor.py
index 7a1ad81..6c76927 100644
--- a/indiquo/core/CandidatePredictor.py
+++ b/indiquo/core/CandidatePredictor.py
@@ -1,8 +1,7 @@
 from typing import List
 import re
-from dramatist.drama.Drama import Drama
 
-from indiquo.core.BasePredictor import BasePredictor
+from indiquo.core.BaseCandidatePredictor import BaseCandidatePredictor
 from indiquo.core.Candidate import Candidate
 from indiquo.core.chunker.BaseChunker import BaseChunker
 import torch
@@ -10,7 +9,7 @@ from kpcommons.Footnote import map_to_real_pos, get_footnote_ranges, remove_foot
 
 
 # noinspection PyMethodMayBeStatic
-class CandidatePredictor(BasePredictor):
+class CandidatePredictor(BaseCandidatePredictor):
 
     def __init__(self, tokenizer, model, chunker: BaseChunker, add_context, max_length):
         self.tokenizer = tokenizer
diff --git a/indiquo/core/CandidatePredictorDummy.py b/indiquo/core/CandidatePredictorDummy.py
index 1c2458c..3175ba3 100644
--- a/indiquo/core/CandidatePredictorDummy.py
+++ b/indiquo/core/CandidatePredictorDummy.py
@@ -1,16 +1,13 @@
 from typing import List
-from sentence_transformers import util
-from dramatist.drama.Drama import Drama
 
-from indiquo.core.BasePredictor import BasePredictor
+from indiquo.core.BaseCandidatePredictor import BaseCandidatePredictor
 from indiquo.core.Candidate import Candidate
 from indiquo.core.chunker.BaseChunker import BaseChunker
 from kpcommons.Footnote import map_to_real_pos, get_footnote_ranges, remove_footnotes
-import re
 
 
 # noinspection PyMethodMayBeStatic
-class CandidatePredictorDummy(BasePredictor):
+class CandidatePredictorDummy(BaseCandidatePredictor):
 
     def __init__(self, chunker: BaseChunker):
         self.chunker = chunker
diff --git a/indiquo/core/CandidatePredictorRW.py b/indiquo/core/CandidatePredictorRW.py
index 077fab9..38ccb9f 100644
--- a/indiquo/core/CandidatePredictorRW.py
+++ b/indiquo/core/CandidatePredictorRW.py
@@ -1,13 +1,18 @@
-from typing import List, Optional
-from flair.data import Sentence
-from flair.nn import Model
-from indiquo.core.BasePredictor import BasePredictor
+from typing import List
+
+try:
+    from flair.data import Sentence
+    from flair.nn import Model
+except ModuleNotFoundError:
+    pass
+
+from indiquo.core.BaseCandidatePredictor import BaseCandidatePredictor
 from indiquo.core.Candidate import Candidate
 from indiquo.core.chunker.BaseChunker import BaseChunker
 from kpcommons.Footnote import map_to_real_pos, get_footnote_ranges, remove_footnotes
 
 
-class CandidatePredictorRW(BasePredictor):
+class CandidatePredictorRW(BaseCandidatePredictor):
 
     def __init__(self, model: Model, chunker: BaseChunker):
         self.model = model
diff --git a/indiquo/core/CandidatePredictorST.py b/indiquo/core/CandidatePredictorST.py
index 1312778..2c5ae6f 100644
--- a/indiquo/core/CandidatePredictorST.py
+++ b/indiquo/core/CandidatePredictorST.py
@@ -2,7 +2,7 @@ from typing import List
 from sentence_transformers import util
 from dramatist.drama.Drama import Drama
 
-from indiquo.core.BasePredictor import BasePredictor
+from indiquo.core.BaseCandidatePredictor import BaseCandidatePredictor
 from indiquo.core.Candidate import Candidate
 from indiquo.core.chunker.BaseChunker import BaseChunker
 from kpcommons.Footnote import map_to_real_pos, get_footnote_ranges, remove_footnotes
@@ -10,7 +10,7 @@ import re
 
 
 # noinspection PyMethodMayBeStatic
-class CandidatePredictorST(BasePredictor):
+class CandidatePredictorST(BaseCandidatePredictor):
 
     def __init__(self, drama: Drama, model, chunker: BaseChunker, add_context, max_length):
         self.drama = drama
diff --git a/indiquo/core/IndiQuo.py b/indiquo/core/IndiQuo.py
index 263c79b..9fd9134 100644
--- a/indiquo/core/IndiQuo.py
+++ b/indiquo/core/IndiQuo.py
@@ -1,16 +1,16 @@
 from typing import List
 
-from indiquo.core.BasePredictor import BasePredictor
+from indiquo.core.BaseCandidatePredictor import BaseCandidatePredictor
+from indiquo.core.BaseScenePredictor import BaseScenePredictor
 from indiquo.core.Candidate import Candidate
 from indiquo.core.IndiQuoBase import IndiQuoBase
-from indiquo.core.ScenePredictor import ScenePredictor
 from indiquo.match.Match import Match
 
 
 # noinspection PyMethodMayBeStatic
 class IndiQuo(IndiQuoBase):
 
-    def __init__(self, candidate_predictor: BasePredictor, scene_predictor: ScenePredictor):
+    def __init__(self, candidate_predictor: BaseCandidatePredictor, scene_predictor: BaseScenePredictor):
         self.candidate_predictor = candidate_predictor
         self.scene_predictor = scene_predictor
 
diff --git a/indiquo/core/IndiQuoBase.py b/indiquo/core/IndiQuoBase.py
index 16ed50b..34fa49b 100644
--- a/indiquo/core/IndiQuoBase.py
+++ b/indiquo/core/IndiQuoBase.py
@@ -1,6 +1,5 @@
 from abc import ABC, abstractmethod
 from typing import List
-from indiquo.core.Candidate import Candidate
 from indiquo.match.Match import Match
 
 
diff --git a/indiquo/core/IndiQuoSum.py b/indiquo/core/IndiQuoSum.py
index cc6a7ad..3e20165 100644
--- a/indiquo/core/IndiQuoSum.py
+++ b/indiquo/core/IndiQuoSum.py
@@ -1,12 +1,8 @@
 from typing import List
 
-from indiquo.core.BasePredictor import BasePredictor
-from indiquo.core.CandidatePredictor import CandidatePredictor
-from indiquo.core.Candidate import Candidate
 from indiquo.core.CandidatePredictorSum import CandidatePredictorSum
 from indiquo.core.CandidateWithScenes import CandidateWithScenes
 from indiquo.core.IndiQuoBase import IndiQuoBase
-from indiquo.core.ScenePredictor import ScenePredictor
 from indiquo.match.Match import Match
 
 
diff --git a/indiquo/core/ScenePredictor.py b/indiquo/core/ScenePredictor.py
index 7f9c602..299a0e9 100644
--- a/indiquo/core/ScenePredictor.py
+++ b/indiquo/core/ScenePredictor.py
@@ -1,10 +1,11 @@
 from dramatist.drama.Drama import Drama
 from sentence_transformers import util
 
+from indiquo.core.BaseScenePredictor import BaseScenePredictor
 from indiquo.core.ScenePrediction import ScenePrediction
 
 
-class ScenePredictor:
+class ScenePredictor(BaseScenePredictor):
 
     def __init__(self, drama: Drama, model, top_k):
         self.model = model
@@ -23,6 +24,7 @@ class ScenePredictor:
 
         self.source_embeddings = model.encode(source_text_blocks, convert_to_tensor=True)
 
+    # overriding abstract method
     def predict_scene(self, text):
         if isinstance(text, str):
             text = [text]
diff --git a/indiquo/core/ScenePredictorDummy.py b/indiquo/core/ScenePredictorDummy.py
new file mode 100644
index 0000000..1790d89
--- /dev/null
+++ b/indiquo/core/ScenePredictorDummy.py
@@ -0,0 +1,14 @@
+from typing import List
+
+from indiquo.core.BaseScenePredictor import BaseScenePredictor
+from indiquo.core.ScenePrediction import ScenePrediction
+
+
+class ScenePredictorDummy(BaseScenePredictor):
+
+    def predict_scene(self, text) -> List[List[ScenePrediction]]:
+        if isinstance(text, str):
+            text = [text]
+
+        result = [[] for _ in range(len(text))]
+        return result
diff --git a/pyproject.toml b/pyproject.toml
index 4f8cf88..1671a4d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,18 +7,24 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "IndiQuo"
-version = "0.0.1"
+version = "0.1.0"
 authors = [
     { name = "Frederik Arnold", email = "frederik.arnold@hu-berlin.de"}
 ]
 description = ""
 readme = "README.md"
 license = { file="LICENSE" }
-requires-python = ">=3.9"
+requires-python = ">=3.11"
 keywords = ["quotation detection", "quotation identification", "indirect citation extraction",
     "natural language processing", "nlp", "text reuse"]
 dependencies = [
-
+    "sentence-transformers>=3.4.1",
+    "dramatist>=0.0.7",
+    "kpcommons>=0.1.2",
+    "pysbd>=0.3.4",
+    "datasets>=3.4.1",
+    "evaluate>=0.4.3",
+    "accelerate>=1.5.2"
 ]
 
 classifiers = [
diff --git a/requirements.txt b/requirements.txt
index b49196c..e5f77e9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
-sentence-transformers~=3.0.1
-dramatist~=0.0.6
-kpcommons~=0.0.3
-pysbd~=0.3.4
-datasets~=2.20.0
-evaluate~=0.4.2
-accelerate~=0.33.0
\ No newline at end of file
+sentence-transformers>=3.4.1
+dramatist>=0.0.7
+kpcommons>=0.1.2
+pysbd>=0.3.4
+datasets>=3.4.1
+evaluate>=0.4.3
+accelerate>=1.5.2
\ No newline at end of file
diff --git a/test/TestSentenceChunker.py b/test/TestSentenceChunker.py
index 45d67ab..6d88a9c 100644
--- a/test/TestSentenceChunker.py
+++ b/test/TestSentenceChunker.py
@@ -21,5 +21,5 @@ class SentenceChunkerTestCase(TestCase):
 
         result = sentence_chunker.chunk(text)
 
-        self.assertEqual(2, len(result))
-        self.assertEqual(24, result[0].end)
+        self.assertEqual(5, len(result))
+        self.assertEqual(22, result[0].end)
-- 
GitLab