Commit 2e0e4b74 authored by Konstantin Schulz's avatar Konstantin Schulz

all main entry points (= mcserver) of the REST API are now documented in the openAPI specification

parent 35d4aa7a
Pipeline #11728 passed with stages
in 2 minutes and 40 seconds
......@@ -7,13 +7,11 @@ from mcserver import Config
bp = Blueprint("api", __name__)
api = Api(bp)
from . import frequencyAPI
from . import frequencyAPI, textcomplexityAPI
from csm.app.api.annisFindAPI import AnnisFindAPI
from csm.app.api.corpusStorageManagerAPI import CorpusStorageManagerAPI
from csm.app.api.subgraphAPI import SubgraphAPI
from csm.app.api.textcomplexityAPI import TextComplexityAPI
api.add_resource(AnnisFindAPI, Config.SERVER_URI_ANNIS_FIND, endpoint="find")
api.add_resource(CorpusStorageManagerAPI, Config.SERVER_URI_CSM, endpoint="csm")
api.add_resource(SubgraphAPI, Config.SERVER_URI_CSM_SUBGRAPH, endpoint="subgraph")
api.add_resource(TextComplexityAPI, Config.SERVER_URI_TEXT_COMPLEXITY, endpoint='textcomplexity')
import rapidjson as json
import flask
from flask_restful import Resource
from flask_restful.reqparse import RequestParser
from mcserver.app.models import AnnisResponse, GraphData, TextComplexity
from mcserver.app.models import AnnisResponse, TextComplexity
from mcserver.app.services import NetworkService, CorpusService, TextComplexityService
from openapi.openapi_server.models import TextComplexityForm
class TextComplexityAPI(Resource):
"""The Text Complexity API resource. It gives users measures for text complexity for a given text."""
def __init__(self):
self.reqparse: RequestParser = NetworkService.base_request_parser.copy()
self.reqparse.add_argument('urn', type=str, location="data", required=True, help='No URN provided')
self.reqparse.add_argument('measure', type=str, location="data", required=True, help='No MEASURE provided')
self.reqparse.add_argument('annis_response', type=dict, location="data", required=False,
help='No ANNIS response provided')
super(TextComplexityAPI, self).__init__()
def post(self):
args: dict = json.loads(flask.request.data.decode("utf-8"))
urn: str = args["urn"]
measure: str = args["measure"]
ar_dict: dict = args.get("annis_response", None)
ar: AnnisResponse = AnnisResponse.from_dict(ar_dict) if ar_dict else CorpusService.get_corpus(urn, is_csm=True)
tc: TextComplexity = TextComplexityService.text_complexity(measure, urn, True, ar.graph_data)
return NetworkService.make_json_response(tc.to_dict())
def post(complexity_data: dict):
tcf: TextComplexityForm = TextComplexityForm.from_dict(complexity_data)
ar: AnnisResponse = AnnisResponse.from_dict(
tcf.annis_response.to_dict()) if tcf.annis_response else CorpusService.get_corpus(tcf.urn, is_csm=True)
tc: TextComplexity = TextComplexityService.text_complexity(tcf.measure, tcf.urn, True, ar.graph_data)
return NetworkService.make_json_response(tc.to_dict())
......@@ -22,10 +22,21 @@ paths:
items:
$ref: "../openapi_models.yaml#/components/schemas/FrequencyItem"
parameters:
- name: urn
in: query
description: CTS URN for referencing the corpus.
required: true
schema:
type: string
example: urn:cts:latinLit:phi1254.phi001.perseus-lat2:5.6.21-5.6.21
- $ref: '../openapi_models.yaml#/components/parameters/UrnParam'
/textcomplexity:
post:
summary: Gives users measures of text complexity for a given text.
operationId: csm.app.api.textcomplexityAPI.post
responses:
200:
description: Text complexity measures for a given text.
content:
application/json:
schema:
$ref: '../openapi_models.yaml#/components/schemas/TextComplexity'
requestBody:
required: true
content:
application/x-www-form-urlencoded:
schema:
$ref: '../openapi_models.yaml#/components/schemas/TextComplexityFormBase'
"""The API blueprint. Register it on the main application to enable the REST API for text retrieval."""
from flask import Blueprint
from flask_restful import Api
from mcserver import Config
bp = Blueprint("api", __name__)
api = Api(bp)
from . import corpusAPI, corpusListAPI, exerciseAPI, exerciseListAPI, fileAPI, frequencyAPI, staticExercisesAPI
from mcserver.app.api.h5pAPI import H5pAPI
from mcserver.app.api.kwicAPI import KwicAPI
from mcserver.app.api.rawTextAPI import RawTextAPI
from mcserver.app.api.textcomplexityAPI import TextComplexityAPI
from mcserver.app.api.validReffAPI import ValidReffAPI
from mcserver.app.api.vectorNetworkAPI import VectorNetworkAPI
from mcserver.app.api.vocabularyAPI import VocabularyAPI
api.add_resource(H5pAPI, Config.SERVER_URI_H5P, endpoint="h5p")
api.add_resource(KwicAPI, Config.SERVER_URI_KWIC, endpoint="kwic")
api.add_resource(RawTextAPI, Config.SERVER_URI_RAW_TEXT, endpoint="rawtext")
api.add_resource(TextComplexityAPI, Config.SERVER_URI_TEXT_COMPLEXITY, endpoint='textcomplexity')
api.add_resource(ValidReffAPI, Config.SERVER_URI_VALID_REFF, endpoint="validReff")
api.add_resource(VectorNetworkAPI, Config.SERVER_URI_VECTOR_NETWORK, endpoint="vectorNetwork")
api.add_resource(VocabularyAPI, Config.SERVER_URI_VOCABULARY, endpoint="vocabulary")
from . import corpusAPI, corpusListAPI, exerciseAPI, exerciseListAPI, fileAPI, frequencyAPI, h5pAPI, kwicAPI, \
rawTextAPI, staticExercisesAPI, textcomplexityAPI, validReffAPI, vectorNetworkAPI, vocabularyAPI
......@@ -12,6 +12,7 @@ from mcserver.app.models import ExerciseType, Solution, ExerciseData, AnnisRespo
from mcserver.app.services import AnnotationService, CorpusService, NetworkService, TextComplexityService
from mcserver.config import Config
from mcserver.models_auto import Exercise, TExercise, UpdateInfo
from openapi.openapi_server.models import ExerciseForm
def adjust_solutions(exercise_data: ExerciseData, exercise_type: str, solutions: List[Solution]) -> List[Solution]:
......@@ -113,19 +114,20 @@ def map_exercise_data_to_database(exercise_data: ExerciseData, exercise_type: st
def post(exercise_data: dict) -> Union[Response, ConnexionResponse]:
exercise_type: ExerciseType = ExerciseType(exercise_data["type"])
search_values_list: List[str] = json.loads(exercise_data["search_values"])
ef: ExerciseForm = ExerciseForm.from_dict(exercise_data)
ef.urn = ef.urn if ef.urn else ""
exercise_type: ExerciseType = ExerciseType(ef.type)
search_values_list: List[str] = json.loads(ef.search_values)
aqls: List[str] = AnnotationService.map_search_values_to_aql(search_values_list=search_values_list,
exercise_type=exercise_type)
search_phenomena: List[Phenomenon] = [Phenomenon().__getattribute__(x.split("=")[0].upper()) for x in
search_values_list]
urn: str = exercise_data.get("urn", "")
# if there is custom text instead of a URN, immediately annotate it
conll_string_or_urn: str = urn if CorpusService.is_urn(urn) else AnnotationService.get_udpipe(
CorpusService.get_raw_text(urn, False))
conll_string_or_urn: str = ef.urn if CorpusService.is_urn(ef.urn) else AnnotationService.get_udpipe(
CorpusService.get_raw_text(ef.urn, False))
try:
# construct graph from CONLL data
response: dict = get_graph_data(title=urn, conll_string_or_urn=conll_string_or_urn, aqls=aqls,
response: dict = get_graph_data(title=ef.urn, conll_string_or_urn=conll_string_or_urn, aqls=aqls,
exercise_type=exercise_type, search_phenomena=search_phenomena)
except ValueError:
return connexion.problem(500, Config.ERROR_TITLE_INTERNAL_SERVER_ERROR,
......@@ -133,12 +135,10 @@ def post(exercise_data: dict) -> Union[Response, ConnexionResponse]:
solutions_dict_list: List[Dict] = response["solutions"]
solutions: List[Solution] = [Solution.from_dict(x) for x in solutions_dict_list]
ar: AnnisResponse = make_new_exercise(
conll=response["conll"], correct_feedback=exercise_data.get("correct_feedback", ""),
exercise_type=exercise_data["type"], general_feedback=exercise_data.get("general_feedback", ""),
graph_data_raw=response["graph_data_raw"], incorrect_feedback=exercise_data.get("incorrect_feedback", ""),
instructions=exercise_data["instructions"], language=exercise_data.get("language", "de"),
partially_correct_feedback=exercise_data.get("partially_correct_feedback", ""),
search_values=exercise_data["search_values"], solutions=solutions,
type_translation=exercise_data.get("type_translation", ""), urn=urn,
work_author=exercise_data.get("work_author", ""), work_title=exercise_data.get("work_title", ""))
conll=response["conll"], correct_feedback=ef.correct_feedback, exercise_type=ef.type,
general_feedback=ef.general_feedback, graph_data_raw=response["graph_data_raw"],
incorrect_feedback=ef.incorrect_feedback, instructions=ef.instructions, language=ef.language,
partially_correct_feedback=ef.partially_correct_feedback, search_values=ef.search_values,
solutions=solutions, type_translation=ef.type_translation, urn=ef.urn, work_author=ef.work_author,
work_title=ef.work_title)
return NetworkService.make_json_response(ar.to_dict())
import json
from typing import List
from flask_restful import Resource, abort
from flask_restful.reqparse import RequestParser
from typing import List, Union
import connexion
from connexion.lifecycle import ConnexionResponse
from flask import Response
from mcserver import Config
from mcserver.app import db
from mcserver.app.models import Language, ExerciseType, Solution
from mcserver.app.services import TextService, NetworkService
from mcserver.models_auto import Exercise
class H5pAPI(Resource):
"""The H5P API resource. It gives users access to interactive exercise layouts."""
def __init__(self):
"""Initialize possible arguments for calls to the H5P REST API."""
self.reqparse: RequestParser = NetworkService.base_request_parser.copy()
self.reqparse.add_argument("eid", type=str, required=True, default="", help="No exercise ID provided")
self.reqparse.add_argument("lang", type=str, required=True, default="en", help="No language code provided")
self.reqparse.add_argument("solution_indices", type=str, required=False, help="No solution IDs provided")
self.feedback_template: str = "{0}: @score {1} @total."
self.json_template_drag_text: dict = {
"taskDescription": "<p>{0}</p>\n",
"checkAnswer": "Prüfen",
"tryAgain": "Nochmal",
"showSolution": "Lösung",
"behaviour": {
"enableRetry": True,
"enableSolutionsButton": True,
"instantFeedback": False,
"enableCheckButton": True
},
"textField": "Blueberries are *blue:Check the name of the berry!*.\nStrawberries are *red*.",
"overallFeedback": [{"from": 0, "to": 100, "feedback": "Punkte: @score von @total."}],
"dropZoneIndex": "Drop Zone @index.",
"empty": "Drop Zone @index is empty.",
"contains": "Drop Zone @index contains draggable @draggable.",
"draggableIndex": "Draggable @text. @index of @count draggables.",
"tipLabel": "Show tip",
"correctText": "Correct!",
"incorrectText": "Incorrect!",
"resetDropTitle": "Reset drop",
"resetDropDescription": "Are you sure you want to reset this drop zone?",
"grabbed": "Draggable is grabbed.",
"cancelledDragging": "Cancelled dragging.",
"correctAnswer": "Correct answer:",
"feedbackHeader": "Feedback",
"scoreBarLabel": "You got :num out of :total points"
}
self.json_template_mark_words: dict = {
"checkAnswerButton": "Check",
"tryAgainButton": "Retry",
"showSolutionButton": "Show solution",
"behaviour": {
"enableRetry": True,
"enableSolutionsButton": True
},
"taskDescription": "<p>Click the various types of berries&nbsp;mentioned&nbsp;in the text below!<\/p>\n",
"textField": "*Bilberries*, also known as *blueberries* are edible, nearly black berries found in nutrient-poor soils.<br><br>*Cloudberries* are edible orange berries similar to *raspberries* or *blackberries* found in alpine and arctic tundra. <br><br>*Redcurrants* are red translucent berries with a diameter of 8\u201310 mm, and are closely related to *blackcurrants*.",
"overallFeedback": [{"from": 0, "to": 100, "feedback": "You got @score of @total points."}]
}
super(H5pAPI, self).__init__()
def get(self):
""" The GET method for the H5P REST API. It provides json templates for client-side H5P exercise layouts. """
args = self.reqparse.parse_args()
eid: str = args["eid"]
solution_indices: List[int] = json.loads(args["solution_indices"] if args["solution_indices"] else "null")
lang: Language
try:
lang = Language(args["lang"])
except ValueError:
lang = Language.English
exercise: Exercise = db.session.query(Exercise).filter_by(eid=eid).first()
db.session.commit()
if exercise is None:
abort(404)
text_field_content: str = ""
if exercise.exercise_type in [ExerciseType.cloze.value, ExerciseType.markWords.value]:
text_field_content = TextService.get_h5p_text_with_solutions(exercise, solution_indices)
elif exercise.exercise_type == ExerciseType.matching.value:
solutions: List[Solution] = TextService.get_solutions_by_index(exercise, solution_indices)
for solution in solutions:
text_field_content += "{0} *{1}*\n".format(solution.target.content, solution.value.content)
else:
abort(422)
response_dict: dict = self.json_template_mark_words
response_dict = get_response(response_dict, lang, self.json_template_drag_text, exercise, text_field_content,
self.feedback_template)
return NetworkService.make_json_response(response_dict)
def get(eid: str, lang: str, solution_indices: List[int]) -> Union[Response, ConnexionResponse]:
""" The GET method for the H5P REST API. It provides JSON templates for client-side H5P exercise layouts. """
language: Language
try:
language = Language(lang)
except ValueError:
language = Language.English
exercise: Exercise = db.session.query(Exercise).filter_by(eid=eid).first()
db.session.commit()
if exercise is None:
return connexion.problem(404, Config.ERROR_TITLE_NOT_FOUND, Config.ERROR_MESSAGE_EXERCISE_NOT_FOUND)
text_field_content: str = ""
if exercise.exercise_type in [ExerciseType.cloze.value, ExerciseType.markWords.value]:
text_field_content = TextService.get_h5p_text_with_solutions(exercise, solution_indices)
elif exercise.exercise_type == ExerciseType.matching.value:
solutions: List[Solution] = TextService.get_solutions_by_index(exercise, solution_indices)
for solution in solutions:
text_field_content += "{0} *{1}*\n".format(solution.target.content, solution.value.content)
else:
return connexion.problem(
422, Config.ERROR_TITLE_UNPROCESSABLE_ENTITY, Config.ERROR_MESSAGE_UNPROCESSABLE_ENTITY)
response_dict: dict = TextService.json_template_mark_words
response_dict = get_response(response_dict, language, TextService.json_template_drag_text, exercise,
text_field_content, TextService.feedback_template)
return NetworkService.make_json_response(response_dict)
def get_response(response_dict: dict, lang: Language, json_template_drag_text: dict, exercise: Exercise,
text_field_content: str, feedback_template: str):
text_field_content: str, feedback_template: str) -> dict:
# default values for buttons and response
button_dict: dict = {"check": ["checkAnswerButton", "Prüfen" if lang == Language.German else "Check"],
"again": ["tryAgainButton", "Nochmal" if lang == Language.German else "Retry"],
......
......@@ -10,46 +10,29 @@ from typing import List, Dict
import requests
from bs4 import BeautifulSoup, ResultSet, Tag
from conllu import TokenList
from flask_restful import Resource
from flask_restful.reqparse import RequestParser
from flask import Response
from mcserver.app.models import ExerciseType, ExerciseData, LinkMC, NodeMC
from mcserver.app.services import AnnotationService, NetworkService
from mcserver.config import Config
from openapi.openapi_server.models import KwicForm
class KwicAPI(Resource):
"""The KWIC API resource. It gives users example contexts for a given phenomenon in a given corpus."""
def __init__(self):
"""Initializes possible arguments for calls to the KWIC REST API."""
self.reqparse: RequestParser = NetworkService.base_request_parser.copy()
self.reqparse.add_argument("urn", type=str, required=True, default="", location="form", help="No URN provided")
self.reqparse.add_argument("search_values", type=str, required=True, location="form",
help="No search value(s) provided")
self.reqparse.add_argument("ctx_left", type=int, required=False, location="form", default=5,
help="No left context size provided")
self.reqparse.add_argument("ctx_right", type=int, required=False, location="form", default=5,
help="No left context size provided")
super(KwicAPI, self).__init__()
def post(self) -> object:
""" The POST method for the KWIC REST API. It provides example contexts for a given phenomenon
in a given corpus. """
args = self.reqparse.parse_args()
search_values_list: List[str] = json.loads(args["search_values"])
aqls: List[str] = AnnotationService.map_search_values_to_aql(search_values_list, ExerciseType.kwic)
ctx_left: int = args["ctx_left"]
ctx_right: int = args["ctx_right"]
url: str = f"{Config.INTERNET_PROTOCOL}{Config.HOST_IP_CSM}:{Config.CORPUS_STORAGE_MANAGER_PORT}{Config.SERVER_URI_CSM_SUBGRAPH}"
data: str = json.dumps(dict(urn=args["urn"], aqls=aqls, ctx_left=str(ctx_left), ctx_right=str(ctx_right)))
response: requests.Response = requests.post(url, data=data)
response_content: List[dict] = json.loads(response.text)
exercise_data_list: List[ExerciseData] = [ExerciseData(json_dict=x) for x in response_content]
ret_val: str = ""
for i in range(len(exercise_data_list)):
ret_val += handle_exercise_data(exercise_data_list[i], ctx_left, ctx_right)
return NetworkService.make_json_response(ret_val)
def post(kwic_data: dict) -> Response:
""" The POST method for the KWIC REST API. It provides example contexts for a given phenomenon
in a given corpus. """
kwic_form: KwicForm = KwicForm.from_dict(kwic_data)
search_values_list: List[str] = json.loads(kwic_form.search_values)
aqls: List[str] = AnnotationService.map_search_values_to_aql(search_values_list, ExerciseType.kwic)
url: str = f"{Config.INTERNET_PROTOCOL}{Config.HOST_IP_CSM}:{Config.CORPUS_STORAGE_MANAGER_PORT}{Config.SERVER_URI_CSM_SUBGRAPH}"
data: str = json.dumps(
dict(urn=kwic_data["urn"], aqls=aqls, ctx_left=str(kwic_form.ctx_left), ctx_right=str(kwic_form.ctx_right)))
response: requests.Response = requests.post(url, data=data)
response_content: List[dict] = json.loads(response.text)
exercise_data_list: List[ExerciseData] = [ExerciseData(json_dict=x) for x in response_content]
ret_val: str = ""
for i in range(len(exercise_data_list)):
ret_val += handle_exercise_data(exercise_data_list[i], kwic_form.ctx_left, kwic_form.ctx_right)
return NetworkService.make_json_response(ret_val)
def handle_exercise_data(ed: ExerciseData, ctx_left: int, ctx_right: int) -> str:
......
from flask_restful import Resource, abort
from flask_restful.reqparse import RequestParser
from mcserver.app.models import AnnisResponse, TextComplexityMeasure, GraphData
from mcserver.app.services import CorpusService, NetworkService, TextComplexityService
from typing import Union
import connexion
from connexion.lifecycle import ConnexionResponse
from flask import Response
class RawTextAPI(Resource):
"""The fill the blank API resource. It creates a fill the blank exercise for a given text."""
from mcserver import Config
from mcserver.app.models import AnnisResponse, TextComplexityMeasure
from mcserver.app.services import CorpusService, NetworkService, TextComplexityService
def __init__(self):
"""Initialize possible arguments for calls to the fill the blank REST API."""
self.reqparse: RequestParser = NetworkService.base_request_parser.copy()
self.reqparse.add_argument("urn", type=str, required=True, default="", help="No URN provided")
super(RawTextAPI, self).__init__()
def get(self):
args = self.reqparse.parse_args()
urn: str = args["urn"]
ar: AnnisResponse = CorpusService.get_corpus(cts_urn=urn, is_csm=False)
if not ar.graph_data.nodes:
abort(404)
ar.text_complexity = TextComplexityService.text_complexity(TextComplexityMeasure.all.name, urn, False,
ar.graph_data).to_dict()
return NetworkService.make_json_response(ar.to_dict())
def get(urn: str) -> Union[Response, ConnexionResponse]:
"""Provides the raw text for a requested text passage."""
ar: AnnisResponse = CorpusService.get_corpus(cts_urn=urn, is_csm=False)
if not ar.graph_data.nodes:
return connexion.problem(404, Config.ERROR_TITLE_NOT_FOUND, Config.ERROR_MESSAGE_CORPUS_NOT_FOUND)
ar.text_complexity = TextComplexityService.text_complexity(TextComplexityMeasure.all.name, urn, False,
ar.graph_data).to_dict()
return NetworkService.make_json_response(ar.to_dict())
from flask_restful import Resource
from flask_restful.reqparse import RequestParser
from mcserver.app.models import AnnisResponse, GraphData, TextComplexity
from mcserver.app.models import AnnisResponse, TextComplexity
from mcserver.app.services import NetworkService, CorpusService, TextComplexityService
class TextComplexityAPI(Resource):
"""The Text Complexity API resource. It gives users measures for text complexity for a given text."""
def __init__(self):
self.reqparse: RequestParser = NetworkService.base_request_parser.copy()
self.reqparse.add_argument('urn', type=str, required=True, help='No URN provided')
self.reqparse.add_argument('measure', type=str, required=True, help='No MEASURE provided')
super(TextComplexityAPI, self).__init__()
def get(self):
args: dict = self.reqparse.parse_args()
urn: str = args["urn"]
measure: str = args["measure"]
ar: AnnisResponse = CorpusService.get_corpus(urn, is_csm=False)
tc: TextComplexity = TextComplexityService.text_complexity(measure, urn, False, ar.graph_data)
return NetworkService.make_json_response(tc.to_dict())
def get(measure: str, urn: str):
"""Gives users measures of text complexity for a given text."""
ar: AnnisResponse = CorpusService.get_corpus(urn, is_csm=False)
tc: TextComplexity = TextComplexityService.text_complexity(measure, urn, False, ar.graph_data)
return NetworkService.make_json_response(tc.to_dict())
from typing import List
from flask_restful import Resource, abort
from flask_restful.reqparse import RequestParser
from typing import List, Union
import connexion
from connexion.lifecycle import ConnexionResponse
from flask import Response
from mcserver import Config
from mcserver.app.services import CorpusService, NetworkService, CustomCorpusService
class ValidReffAPI(Resource):
"""The valid references API resource. It gives users all the citable text references for a corpus."""
def __init__(self):
"""Initialize possible arguments for calls to the valid references REST API."""
self.reqparse: RequestParser = NetworkService.base_request_parser.copy()
self.reqparse.add_argument("urn", type=str, required=True, default="", help="No URN provided")
super(ValidReffAPI, self).__init__()
def get(self):
"""The GET method for the valid references REST API. It provides references for the desired text."""
args = self.reqparse.parse_args()
urn: str = args["urn"]
reff: List[str] = CustomCorpusService.get_custom_corpus_reff(urn) if CustomCorpusService.is_custom_corpus_urn(
urn) else CorpusService.get_standard_corpus_reff(urn)
if not reff:
abort(404)
return NetworkService.make_json_response(reff)
def get(urn: str) -> Union[Response, ConnexionResponse]:
"""The GET method for the valid references REST API. It provides references for the desired text."""
reff: List[str] = CustomCorpusService.get_custom_corpus_reff(urn) if CustomCorpusService.is_custom_corpus_urn(
urn) else CorpusService.get_standard_corpus_reff(urn)
if not reff:
return connexion.problem(404, Config.ERROR_TITLE_NOT_FOUND, Config.ERROR_MESSAGE_CORPUS_NOT_FOUND)
return NetworkService.make_json_response(reff)
......@@ -2,8 +2,8 @@
import os
import re
from typing import List, Dict, Set, Tuple, Pattern
from flask_restful import Resource
from flask_restful.reqparse import RequestParser
from flask import Response
from gensim import matutils
from gensim.models import Word2Vec
from matplotlib import pyplot
......@@ -12,63 +12,7 @@ from numpy.core.multiarray import ndarray, dot
from mcserver import Config
from mcserver.app.services import NetworkService
class VectorNetworkAPI(Resource):
"""The vector network API resource. It helps to manage network data for the vectors in an AI model."""
def __init__(self):
"""Initialize possible arguments for calls to the corpus list REST API."""
self.reqparse: RequestParser = NetworkService.base_request_parser.copy()
self.reqparse.add_argument("search_regex", type=str, required=True,
help="No regular expression provided for the search")
self.reqparse.add_argument("min_count", type=int, required=False, default=1,
help="No minimum count for word occurrences provided")
self.reqparse.add_argument("highlight_regex", type=str, required=False, default="",
help="No regular expression provided for highlighting")
self.reqparse.add_argument("nearest_neighbor_count", type=int, required=False, default=0,
help="No regular expression provided for highlighting")
super(VectorNetworkAPI, self).__init__()
def get(self):
"""The GET method for the vector network REST API. It provides network data for the vectors in an AI model."""
args: dict = self.reqparse.parse_args()
search_regex: str = args["search_regex"]
min_count: int = args["min_count"]
highlight_regex: str = args["highlight_regex"]
nearest_neighbor_count: int = args["nearest_neighbor_count"]
ret_val: str = get_concept_network(search_regex, min_count, highlight_regex, nearest_neighbor_count)
return NetworkService.make_json_response(ret_val)
def post(self):
"""
The POST method for the vector network REST API. It provides sentences whose content is similar to a given word.
"""
args: dict = self.reqparse.parse_args()
search_regex_string: str = args["search_regex"]
nearest_neighbor_count: int = args["nearest_neighbor_count"]
nearest_neighbor_count = nearest_neighbor_count if nearest_neighbor_count else 10
w2v: Word2Vec = Word2Vec.load(Config.PANEGYRICI_LATINI_MODEL_PATH)
search_regex: Pattern[str] = re.compile(search_regex_string)
keys: List[str] = [x for x in w2v.wv.vocab if search_regex.match(x)]
relevant_vectors: List[ndarray] = [w2v.wv.get_vector(x) for x in keys]
target_vector: ndarray = sum(relevant_vectors) / len(relevant_vectors)
sentences: List[str] = open(Config.PANEGYRICI_LATINI_TEXT_PATH).readlines()
sentence_vectors: Dict[int, ndarray] = {}
for i in range(len(sentences)):
toks: List[str] = sentences[i][:-1].split()
if toks:
vecs: List[ndarray] = []
for tok in toks:
vector: ndarray = w2v.wv.get_vector(tok)
vecs.append(vector)
sentence_vectors[i] = sum(vecs) / len(vecs)
sims: List[Tuple[int, ndarray]] = []
for key in sentence_vectors.keys():
sims.append((key, dot(matutils.unitvec(target_vector), matutils.unitvec(sentence_vectors[key]))))
sims.sort(key=lambda x: x[1], reverse=True)
sims = sims[:nearest_neighbor_count]
return [sentences[x[0]].split() for x in sims]
from openapi.openapi_server.models import VectorNetworkForm
def add_edges(keys: List[str], w2v: Word2Vec, nearest_neighbor_count: int, min_count: int, graph: Graph) -> None:
......@@ -91,6 +35,12 @@ def add_edges(keys: List[str], w2v: Word2Vec, nearest_neighbor_count: int, min_c
graph.add_edge(edge_source, edge_target)
def get(search_regex: str, highlight_regex: str, min_count: int, nearest_neighbor_count: int) -> Response:
"""The GET method for the vector network REST API. It provides network data for the vectors in an AI model."""
ret_val: str = get_concept_network(search_regex, min_count, highlight_regex, nearest_neighbor_count)
return NetworkService.make_json_response(ret_val)
def get_concept_network(search_regex_string: str, min_count: int = 1, highlight_regex_string: str = "",
nearest_neighbor_count: int = 0) -> str:
"""Extracts a network of words from vector data in an AI model."""
......@@ -122,3 +72,32 @@ def get_concept_network(search_regex_string: str, min_count: int = 1, highlight_
os.remove(Config.NETWORK_GRAPH_TMP_PATH)