Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Open sidebar
callidus
Machina Callida
Commits
a6a855f2
Commit
a6a855f2
authored
Aug 05, 2020
by
Konstantin Schulz
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
simple database queries are now handled uniformly in the database service
parent
faca3c34
Pipeline
#12552
passed with stages
in 3 minutes and 2 seconds
Changes
14
Pipelines
2
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
197 additions
and
158 deletions
+197
-158
mc_backend/mcserver/app/__init__.py
mc_backend/mcserver/app/__init__.py
+6
-5
mc_backend/mcserver/app/api/corpusAPI.py
mc_backend/mcserver/app/api/corpusAPI.py
+6
-6
mc_backend/mcserver/app/api/corpusListAPI.py
mc_backend/mcserver/app/api/corpusListAPI.py
+4
-7
mc_backend/mcserver/app/api/exerciseAPI.py
mc_backend/mcserver/app/api/exerciseAPI.py
+6
-7
mc_backend/mcserver/app/api/exerciseListAPI.py
mc_backend/mcserver/app/api/exerciseListAPI.py
+3
-5
mc_backend/mcserver/app/api/fileAPI.py
mc_backend/mcserver/app/api/fileAPI.py
+3
-3
mc_backend/mcserver/app/api/h5pAPI.py
mc_backend/mcserver/app/api/h5pAPI.py
+4
-6
mc_backend/mcserver/app/services/__init__.py
mc_backend/mcserver/app/services/__init__.py
+3
-2
mc_backend/mcserver/app/services/annotationService.py
mc_backend/mcserver/app/services/annotationService.py
+0
-8
mc_backend/mcserver/app/services/corpusService.py
mc_backend/mcserver/app/services/corpusService.py
+64
-5
mc_backend/mcserver/app/services/databaseService.py
mc_backend/mcserver/app/services/databaseService.py
+26
-88
mc_backend/mcserver/app/services/exerciseService.py
mc_backend/mcserver/app/services/exerciseService.py
+43
-0
mc_backend/mocks.py
mc_backend/mocks.py
+1
-0
mc_backend/tests.py
mc_backend/tests.py
+28
-16
No files found.
mc_backend/mcserver/app/__init__.py
View file @
a6a855f2
...
...
@@ -65,10 +65,11 @@ def full_init(app: Flask, cfg: Type[Config] = Config) -> None:
""" Fully initializes the application, including logging."""
from
mcserver.app.services
import
DatabaseService
DatabaseService
.
init_db_update_info
()
DatabaseService
.
update_exercises
(
is_csm
=
True
)
DatabaseService
.
init_db_corpus
()
from
mcserver.app.services.corpusService
import
CorpusService
CorpusService
.
init_corpora
()
from
mcserver.app.services
import
ExerciseService
ExerciseService
.
update_exercises
(
is_csm
=
True
)
if
not
cfg
.
TESTING
:
from
mcserver.app.services.corpusService
import
CorpusService
CorpusService
.
init_graphannis_logging
()
start_updater
(
app
)
...
...
@@ -130,8 +131,8 @@ def log_exception(sender_app: Flask, exception, **extra):
def
start_updater
(
app
:
Flask
)
->
Thread
:
""" Starts a new Thread for to perform updates in the background. """
from
mcserver.app.services
import
Database
Service
t
=
Thread
(
target
=
Database
Service
.
init_updater
,
args
=
(
app
,))
from
mcserver.app.services
import
Corpus
Service
t
=
Thread
(
target
=
Corpus
Service
.
init_updater
,
args
=
(
app
,))
t
.
daemon
=
True
t
.
start
()
return
t
...
...
mc_backend/mcserver/app/api/corpusAPI.py
View file @
a6a855f2
...
...
@@ -13,8 +13,8 @@ from mcserver.models_auto import Corpus
def
delete
(
cid
:
int
)
->
Union
[
Response
,
ConnexionResponse
]:
"""The DELETE method for the corpus REST API. It deletes metadata for a specific text."""
corpus
:
Corpus
=
db
.
session
.
query
(
Corpus
).
filter_by
(
cid
=
cid
)
.
first
(
)
if
corpus
is
None
:
corpus
:
Corpus
=
DatabaseService
.
query
(
Corpus
,
filter_by
=
dict
(
cid
=
cid
)
,
first
=
True
)
if
not
corpus
:
return
connexion
.
problem
(
404
,
Config
.
ERROR_TITLE_NOT_FOUND
,
Config
.
ERROR_MESSAGE_CORPUS_NOT_FOUND
)
db
.
session
.
delete
(
corpus
)
DatabaseService
.
commit
()
...
...
@@ -23,16 +23,16 @@ def delete(cid: int) -> Union[Response, ConnexionResponse]:
def
get
(
cid
:
int
)
->
Union
[
Response
,
ConnexionResponse
]:
"""The GET method for the corpus REST API. It provides metadata for a specific text."""
corpus
:
Corpus
=
db
.
session
.
query
(
Corpus
).
filter_by
(
cid
=
cid
)
.
first
(
)
if
corpus
is
None
:
corpus
:
Corpus
=
DatabaseService
.
query
(
Corpus
,
filter_by
=
dict
(
cid
=
cid
)
,
first
=
True
)
if
not
corpus
:
return
connexion
.
problem
(
404
,
Config
.
ERROR_TITLE_NOT_FOUND
,
Config
.
ERROR_MESSAGE_CORPUS_NOT_FOUND
)
return
NetworkService
.
make_json_response
(
corpus
.
to_dict
())
def
patch
(
cid
:
int
,
**
kwargs
)
->
Union
[
Response
,
ConnexionResponse
]:
"""The PUT method for the corpus REST API. It provides updates metadata for a specific text."""
corpus
:
Corpus
=
db
.
session
.
query
(
Corpus
).
filter_by
(
cid
=
cid
)
.
first
(
)
if
corpus
is
None
:
corpus
:
Corpus
=
DatabaseService
.
query
(
Corpus
,
filter_by
=
dict
(
cid
=
cid
)
,
first
=
True
)
if
not
corpus
:
return
connexion
.
problem
(
404
,
Config
.
ERROR_TITLE_NOT_FOUND
,
Config
.
ERROR_MESSAGE_CORPUS_NOT_FOUND
)
for
k
,
v
in
kwargs
.
items
():
if
v
is
not
None
:
...
...
mc_backend/mcserver/app/api/corpusListAPI.py
View file @
a6a855f2
...
...
@@ -2,7 +2,6 @@
from
connexion.lifecycle
import
ConnexionResponse
from
flask
import
Response
from
typing
import
List
,
Union
from
mcserver.app
import
db
from
mcserver.app.models
import
ResourceType
from
mcserver.app.services
import
NetworkService
,
DatabaseService
from
mcserver.models_auto
import
Corpus
,
UpdateInfo
...
...
@@ -10,11 +9,9 @@ from mcserver.models_auto import Corpus, UpdateInfo
def
get
(
last_update_time
:
int
)
->
Union
[
Response
,
ConnexionResponse
]:
"""The GET method for the corpus list REST API. It provides metadata for all available texts."""
ui_cts
:
UpdateInfo
ui_cts
=
db
.
session
.
query
(
UpdateInfo
).
filter_by
(
resource_type
=
ResourceType
.
cts_data
.
name
).
first
()
DatabaseService
.
commit
()
if
ui_cts
.
last_modified_time
>=
last_update_time
/
1000
:
corpora
:
List
[
Corpus
]
=
db
.
session
.
query
(
Corpus
).
all
()
DatabaseService
.
commit
()
ui_cts
:
UpdateInfo
=
DatabaseService
.
query
(
UpdateInfo
,
filter_by
=
dict
(
resource_type
=
ResourceType
.
cts_data
.
name
),
first
=
True
)
if
ui_cts
and
ui_cts
.
last_modified_time
>=
last_update_time
/
1000
:
corpora
:
List
[
Corpus
]
=
DatabaseService
.
query
(
Corpus
)
return
NetworkService
.
make_json_response
([
x
.
to_dict
()
for
x
in
corpora
])
return
NetworkService
.
make_json_response
(
None
)
mc_backend/mcserver/app/api/exerciseAPI.py
View file @
a6a855f2
...
...
@@ -10,7 +10,7 @@ from mcserver.app import db
from
mcserver.app.models
import
ExerciseType
,
Solution
,
ExerciseData
,
AnnisResponse
,
Phenomenon
,
TextComplexity
,
\
TextComplexityMeasure
,
ResourceType
,
ExerciseMC
,
GraphData
from
mcserver.app.services
import
AnnotationService
,
CorpusService
,
NetworkService
,
TextComplexityService
,
\
DatabaseService
DatabaseService
,
ExerciseService
from
mcserver.config
import
Config
from
mcserver.models_auto
import
Exercise
,
TExercise
,
UpdateInfo
from
openapi.openapi_server.models
import
ExerciseForm
...
...
@@ -28,9 +28,8 @@ def adjust_solutions(exercise_data: ExerciseData, exercise_type: str, solutions:
def
get
(
eid
:
str
)
->
Union
[
Response
,
ConnexionResponse
]:
exercise
:
TExercise
=
db
.
session
.
query
(
Exercise
).
filter_by
(
eid
=
eid
).
first
()
DatabaseService
.
commit
()
if
exercise
is
None
:
exercise
:
TExercise
=
DatabaseService
.
query
(
Exercise
,
filter_by
=
dict
(
eid
=
eid
),
first
=
True
)
if
not
exercise
:
return
connexion
.
problem
(
404
,
Config
.
ERROR_TITLE_NOT_FOUND
,
Config
.
ERROR_MESSAGE_EXERCISE_NOT_FOUND
)
ar
:
AnnisResponse
=
CorpusService
.
get_corpus
(
cts_urn
=
exercise
.
urn
,
is_csm
=
False
)
if
not
ar
.
graph_data
.
nodes
:
...
...
@@ -67,7 +66,7 @@ def make_new_exercise(conll: str, correct_feedback: str, exercise_type: str, gen
# generate a GUID so we can offer the exercise XML as a file download
xml_guid
=
str
(
uuid
.
uuid4
())
# assemble the mapped exercise data
ed
:
ExerciseData
=
Annotation
Service
.
map_graph_data_to_exercise
(
ed
:
ExerciseData
=
Exercise
Service
.
map_graph_data_to_exercise
(
graph_data_raw
=
graph_data_raw
,
solutions
=
solutions
,
xml_guid
=
xml_guid
)
# for markWords exercises, add the maximum number of correct solutions to the description
instructions
+=
(
f
"(
{
len
(
solutions
)
}
)"
if
exercise_type
==
ExerciseType
.
markWords
.
value
else
""
)
...
...
@@ -107,8 +106,8 @@ def map_exercise_data_to_database(exercise_data: ExerciseData, exercise_type: st
work_title
=
work_title
,
urn
=
urn
)
# add the mapped exercise to the database
db
.
session
.
add
(
new_exercise
)
ui_exercises
:
UpdateInfo
=
db
.
session
.
query
(
UpdateInfo
).
filter_b
y
(
resource_type
=
ResourceType
.
exercise_list
.
name
)
.
first
(
)
ui_exercises
:
UpdateInfo
=
DatabaseService
.
quer
y
(
UpdateInfo
,
filter_by
=
dict
(
resource_type
=
ResourceType
.
exercise_list
.
name
)
,
first
=
True
)
ui_exercises
.
last_modified_time
=
datetime
.
utcnow
().
timestamp
()
DatabaseService
.
commit
()
return
new_exercise
...
...
mc_backend/mcserver/app/api/exerciseListAPI.py
View file @
a6a855f2
...
...
@@ -13,9 +13,8 @@ from openapi.openapi_server.models import MatchingExercise
def
get
(
lang
:
str
,
frequency_upper_bound
:
int
,
last_update_time
:
int
,
vocabulary
:
str
=
""
):
"""The GET method for the exercise list REST API. It provides metadata for all available exercises."""
vocabulary_set
:
Set
[
str
]
ui_exercises
:
UpdateInfo
=
db
.
session
.
query
(
UpdateInfo
).
filter_by
(
resource_type
=
ResourceType
.
exercise_list
.
name
).
first
()
DatabaseService
.
commit
()
ui_exercises
:
UpdateInfo
=
DatabaseService
.
query
(
UpdateInfo
,
filter_by
=
dict
(
resource_type
=
ResourceType
.
exercise_list
.
name
),
first
=
True
)
if
ui_exercises
.
last_modified_time
<
last_update_time
/
1000
:
return
NetworkService
.
make_json_response
([])
try
:
...
...
@@ -28,8 +27,7 @@ def get(lang: str, frequency_upper_bound: int, last_update_time: int, vocabulary
lang
=
Language
(
lang
)
except
ValueError
:
lang
=
Language
.
English
exercises
:
List
[
Exercise
]
=
db
.
session
.
query
(
Exercise
).
filter_by
(
language
=
lang
.
value
)
DatabaseService
.
commit
()
exercises
:
List
[
Exercise
]
=
DatabaseService
.
query
(
Exercise
,
filter_by
=
dict
(
language
=
lang
.
value
))
matching_exercises
:
List
[
MatchingExercise
]
=
[
MatchingExercise
.
from_dict
(
x
.
to_dict
())
for
x
in
exercises
]
if
len
(
vocabulary_set
):
for
exercise
in
matching_exercises
:
...
...
mc_backend/mcserver/app/api/fileAPI.py
View file @
a6a855f2
...
...
@@ -17,7 +17,8 @@ from mcserver.models_auto import Exercise, UpdateInfo, LearningResult
def
clean_tmp_folder
():
""" Cleans the files directory regularly. """
ui_file
:
UpdateInfo
=
db
.
session
.
query
(
UpdateInfo
).
filter_by
(
resource_type
=
ResourceType
.
file_api_clean
.
name
).
first
()
ui_file
:
UpdateInfo
=
DatabaseService
.
query
(
UpdateInfo
,
filter_by
=
dict
(
resource_type
=
ResourceType
.
file_api_clean
.
name
),
first
=
True
)
ui_datetime
:
datetime
=
datetime
.
fromtimestamp
(
ui_file
.
last_modified_time
)
if
(
datetime
.
utcnow
()
-
ui_datetime
).
total_seconds
()
>
Config
.
INTERVAL_FILE_DELETE
:
for
file
in
[
x
for
x
in
os
.
listdir
(
Config
.
TMP_DIRECTORY
)
if
x
not
in
".gitignore"
]:
...
...
@@ -35,8 +36,7 @@ def clean_tmp_folder():
def
get
(
id
:
str
,
type
:
FileType
,
solution_indices
:
List
[
int
])
->
Union
[
ETagResponseMixin
,
ConnexionResponse
]:
"""The GET method for the file REST API. It provides the URL to download a specific file."""
clean_tmp_folder
()
exercise
:
Exercise
=
db
.
session
.
query
(
Exercise
).
filter_by
(
eid
=
id
).
first
()
DatabaseService
.
commit
()
exercise
:
Exercise
=
DatabaseService
.
query
(
Exercise
,
filter_by
=
dict
(
eid
=
id
),
first
=
True
)
file_name
:
str
=
id
+
"."
+
str
(
type
)
mime_type
:
str
=
MimeType
[
type
].
value
if
exercise
is
None
:
...
...
mc_backend/mcserver/app/api/h5pAPI.py
View file @
a6a855f2
...
...
@@ -27,9 +27,8 @@ def determine_language(lang: str) -> Language:
def
get
(
eid
:
str
,
lang
:
str
,
solution_indices
:
List
[
int
])
->
Union
[
Response
,
ConnexionResponse
]:
""" The GET method for the H5P REST API. It provides JSON templates for client-side H5P exercise layouts. """
language
:
Language
=
determine_language
(
lang
)
exercise
:
Exercise
=
db
.
session
.
query
(
Exercise
).
filter_by
(
eid
=
eid
).
first
()
DatabaseService
.
commit
()
if
exercise
is
None
:
exercise
:
Exercise
=
DatabaseService
.
query
(
Exercise
,
filter_by
=
dict
(
eid
=
eid
),
first
=
True
)
if
not
exercise
:
return
connexion
.
problem
(
404
,
Config
.
ERROR_TITLE_NOT_FOUND
,
Config
.
ERROR_MESSAGE_EXERCISE_NOT_FOUND
)
text_field_content
:
str
=
get_text_field_content
(
exercise
,
solution_indices
)
if
not
text_field_content
:
...
...
@@ -106,9 +105,8 @@ def post(h5p_data: dict):
""" The POST method for the H5P REST API. It offers client-side H5P exercises for download as ZIP archives. """
h5p_form
:
H5PForm
=
H5PForm
.
from_dict
(
h5p_data
)
language
:
Language
=
determine_language
(
h5p_form
.
lang
)
exercise
:
Exercise
=
db
.
session
.
query
(
Exercise
).
filter_by
(
eid
=
h5p_form
.
eid
).
first
()
DatabaseService
.
commit
()
if
exercise
is
None
:
exercise
:
Exercise
=
DatabaseService
.
query
(
Exercise
,
filter_by
=
dict
(
eid
=
h5p_form
.
eid
),
first
=
True
)
if
not
exercise
:
return
connexion
.
problem
(
404
,
Config
.
ERROR_TITLE_NOT_FOUND
,
Config
.
ERROR_MESSAGE_EXERCISE_NOT_FOUND
)
text_field_content
:
str
=
get_text_field_content
(
exercise
,
h5p_form
.
solution_indices
)
if
not
text_field_content
:
...
...
mc_backend/mcserver/app/services/__init__.py
View file @
a6a855f2
...
...
@@ -4,7 +4,8 @@ from flask import Blueprint
bp
=
Blueprint
(
"services"
,
__name__
)
# the order of imports is important, please don't change it if you don't know what you are doing
# the order of imports is very important, please don't change it if you don't know what you are doing
from
mcserver.app.services.databaseService
import
DatabaseService
from
mcserver.app.services.textService
import
TextService
from
mcserver.app.services.xmlService
import
XMLservice
from
mcserver.app.services.fileService
import
FileService
...
...
@@ -14,4 +15,4 @@ from mcserver.app.services.customCorpusService import CustomCorpusService
from
mcserver.app.services.frequencyService
import
FrequencyService
from
mcserver.app.services.corpusService
import
CorpusService
from
mcserver.app.services.textComplexityService
import
TextComplexityService
from
mcserver.app.services.
databa
seService
import
Databa
seService
from
mcserver.app.services.
exerci
seService
import
Exerci
seService
mc_backend/mcserver/app/services/annotationService.py
View file @
a6a855f2
...
...
@@ -239,14 +239,6 @@ class AnnotationService:
AnnotationService
.
sort_nodes
(
graph_data
)
return
graph_data
@
staticmethod
def
map_graph_data_to_exercise
(
graph_data_raw
:
Dict
,
xml_guid
:
str
,
solutions
:
List
[
Solution
]):
""" Creates an ExerciseData object from the separate parts. """
# create the basis for the download URL
xml_url
=
"/"
+
xml_guid
graph_data
:
GraphData
=
AnnotationService
.
map_graph_data
(
graph_data_raw
)
return
ExerciseData
(
graph
=
graph_data
,
solutions
=
solutions
,
uri
=
xml_url
)
@
staticmethod
def
map_node
(
node
:
dict
):
""" Maps a node dictionary to the native NodeMC class. """
...
...
mc_backend/mcserver/app/services/corpusService.py
View file @
a6a855f2
import
sys
from
datetime
import
datetime
import
rapidjson
as
json
import
os
from
typing
import
List
,
Union
,
Set
,
Tuple
from
typing
import
List
,
Union
,
Set
,
Tuple
,
Dict
import
requests
from
MyCapytain.retrievers.cts5
import
HttpCtsRetriever
from
conllu
import
TokenList
from
flask
import
Flask
from
graphannis
import
CAPI
,
ffi
from
graphannis.cs
import
ResultOrder
from
graphannis.errors
import
consume_errors
,
NoSuchCorpus
,
GraphANNISException
...
...
@@ -13,13 +15,14 @@ from lxml import etree
from
networkx
import
graph
,
MultiDiGraph
from
networkx.readwrite
import
json_graph
from
requests
import
HTTPError
from
sqlalchemy.exc
import
OperationalError
from
mcserver.app
import
db
from
mcserver.app.models
import
CitationLevel
,
GraphData
,
Solution
,
ExerciseType
,
Phenomenon
,
AnnisResponse
,
CorpusMC
,
\
make_solution_element_from_salt_id
,
FrequencyItem
make_solution_element_from_salt_id
,
FrequencyItem
,
ResourceType
from
mcserver.app.services
import
AnnotationService
,
XMLservice
,
TextService
,
FileService
,
FrequencyService
,
\
CustomCorpusService
CustomCorpusService
,
DatabaseService
from
mcserver.config
import
Config
from
mcserver.models_auto
import
Corpus
from
mcserver.models_auto
import
Corpus
,
UpdateInfo
class
CorpusService
:
...
...
@@ -51,6 +54,24 @@ class CorpusService:
new_corpus
.
uri
=
"/{0}"
.
format
(
new_corpus
.
cid
)
db
.
session
.
commit
()
@
staticmethod
def
check_corpus_list_age
(
app
:
Flask
)
->
None
:
""" Checks whether the corpus list needs to be updated. If yes, it performs the update. """
app
.
logger
.
info
(
"Corpus update started."
)
ui_cts
:
UpdateInfo
=
DatabaseService
.
query
(
UpdateInfo
,
filter_by
=
dict
(
resource_type
=
ResourceType
.
cts_data
.
name
),
first
=
True
)
DatabaseService
.
commit
()
if
ui_cts
is
None
:
app
.
logger
.
info
(
"UpdateInfo not available!"
)
return
else
:
ui_datetime
:
datetime
=
datetime
.
fromtimestamp
(
ui_cts
.
last_modified_time
)
if
(
datetime
.
utcnow
()
-
ui_datetime
).
total_seconds
()
>
Config
.
INTERVAL_CORPUS_UPDATE
:
CorpusService
.
update_corpora
()
ui_cts
.
last_modified_time
=
datetime
.
utcnow
().
timestamp
()
DatabaseService
.
commit
()
app
.
logger
.
info
(
"Corpus update completed."
)
@
staticmethod
def
find_matches
(
urn
:
str
,
aql
:
str
,
is_csm
:
bool
=
False
)
->
List
[
str
]:
""" Finds matches for a given URN and AQL and returns the corresponding node IDs. """
...
...
@@ -281,6 +302,30 @@ class CorpusService:
ctx_left
=
ctx_left
,
ctx_right
=
ctx_right
))
return
AnnisResponse
.
from_dict
(
json
.
loads
(
response
.
text
))
@
staticmethod
def
init_corpora
()
->
None
:
"""Initializes the corpus list if it is not already there and up to date."""
if
DatabaseService
.
has_table
(
Config
.
DATABASE_TABLE_CORPUS
):
CorpusService
.
existing_corpora
=
DatabaseService
.
query
(
Corpus
)
urn_dict
:
Dict
[
str
,
int
]
=
{
v
.
source_urn
:
i
for
i
,
v
in
enumerate
(
CorpusService
.
existing_corpora
)}
for
cc
in
CustomCorpusService
.
custom_corpora
:
if
cc
.
corpus
.
source_urn
in
urn_dict
:
existing_corpus
:
Corpus
=
CorpusService
.
existing_corpora
[
urn_dict
[
cc
.
corpus
.
source_urn
]]
CorpusService
.
update_corpus
(
title_value
=
cc
.
corpus
.
title
,
urn
=
cc
.
corpus
.
source_urn
,
author
=
cc
.
corpus
.
author
,
corpus_to_update
=
existing_corpus
,
citation_levels
=
[
cc
.
corpus
.
citation_level_1
,
cc
.
corpus
.
citation_level_2
,
cc
.
corpus
.
citation_level_3
])
else
:
citation_levels
:
List
[
CitationLevel
]
=
[]
for
cl
in
[
cc
.
corpus
.
citation_level_1
,
cc
.
corpus
.
citation_level_2
,
cc
.
corpus
.
citation_level_3
]:
citation_levels
+=
[
cl
]
if
cl
!=
CitationLevel
.
default
else
[]
CorpusService
.
add_corpus
(
title_value
=
cc
.
corpus
.
title
,
urn
=
cc
.
corpus
.
source_urn
,
group_name_value
=
cc
.
corpus
.
author
,
citation_levels
=
citation_levels
)
CorpusService
.
existing_corpora
=
DatabaseService
.
query
(
Corpus
)
@
staticmethod
def
init_graphannis_logging
()
->
None
:
"""Initializes logging for the graphannis backend."""
...
...
@@ -289,6 +334,21 @@ class CorpusService:
err
)
# Debug
consume_errors
(
err
)
@
staticmethod
def
init_updater
(
app
:
Flask
)
->
None
:
"""Initializes a thread that regularly performs updates."""
app
.
app_context
().
push
()
while
True
:
try
:
CorpusService
.
check_corpus_list_age
(
app
)
except
OperationalError
:
pass
import
gc
gc
.
collect
()
from
time
import
sleep
# sleep for 1 hour
sleep
(
Config
.
INTERVAL_CORPUS_AGE_CHECK
)
@
staticmethod
def
is_urn
(
maybe_urn
:
str
):
""" Checks if the string represents a URN. """
...
...
@@ -338,7 +398,6 @@ class CorpusService:
def
update_corpora
():
"""Checks the remote repositories for new corpora to be included in our database."""
CorpusService
.
existing_corpora
=
db
.
session
.
query
(
Corpus
).
all
()
db
.
session
.
commit
()
resolver
:
HttpCtsRetriever
=
HttpCtsRetriever
(
Config
.
CTS_API_BASE_URL
)
# check the appropriate literature for the desired author
resp
:
str
=
resolver
.
getCapabilities
(
urn
=
"urn:cts:latinLit"
)
# "urn:cts:greekLit" for Greek
...
...
mc_backend/mcserver/app/services/databaseService.py
View file @
a6a855f2
from
datetime
import
datetime
from
typing
import
List
,
Dict
from
flask
import
Flask
from
typing
import
Union
,
Any
from
flask_migrate
import
stamp
,
upgrade
import
rapidjson
as
json
from
sqlalchemy.exc
import
OperationalError
,
InvalidRequestError
from
sqlalchemy.orm
import
Query
from
mcserver.app
import
db
from
mcserver.app.models
import
CitationLevel
,
ResourceType
,
TextComplexityMeasure
,
AnnisResponse
,
TextComplexity
from
mcserver.app.services
import
CorpusService
,
CustomCorpusService
,
TextComplexityService
from
mcserver.app.models
import
ResourceType
from
mcserver.config
import
Config
from
mcserver.models_auto
import
Corpus
,
Exercise
,
UpdateInfo
from
mcserver.models_auto
import
Corpus
,
Exercise
,
UpdateInfo
,
LearningResult
class
DatabaseService
:
@
staticmethod
def
check_corpus_list_age
(
app
:
Flask
)
->
None
:
""" Checks whether the corpus list needs to be updated. If yes, it performs the update. """
app
.
logger
.
info
(
"Corpus update started."
)
ui_cts
:
UpdateInfo
=
db
.
session
.
query
(
UpdateInfo
).
filter_by
(
resource_type
=
ResourceType
.
cts_data
.
name
).
first
()
DatabaseService
.
commit
()
if
ui_cts
is
None
:
app
.
logger
.
info
(
"UpdateInfo not available!"
)
return
else
:
ui_datetime
:
datetime
=
datetime
.
fromtimestamp
(
ui_cts
.
last_modified_time
)
if
(
datetime
.
utcnow
()
-
ui_datetime
).
total_seconds
()
>
Config
.
INTERVAL_CORPUS_UPDATE
:
CorpusService
.
update_corpora
()
ui_cts
.
last_modified_time
=
datetime
.
utcnow
().
timestamp
()
DatabaseService
.
commit
()
app
.
logger
.
info
(
"Corpus update completed."
)
@
staticmethod
def
commit
():
"""Commits the last action to the database and, if it fails, rolls back the current session."""
...
...
@@ -41,43 +19,25 @@ class DatabaseService:
db
.
session
.
rollback
()
raise
@
staticmethod
def
has_table
(
table
:
str
)
->
bool
:
"""Checks if a table is present in the database or not."""
return
db
.
engine
.
dialect
.
has_table
(
db
.
engine
,
table
)
@
staticmethod
def
init_db_alembic
()
->
None
:
"""In Docker, the alembic version is not initially written to the database, so we need to set it manually."""
if
not
db
.
engine
.
dialect
.
has_table
(
db
.
engine
,
Config
.
DATABASE_TABLE_ALEMBIC
):
if
not
DatabaseService
.
has_table
(
Config
.
DATABASE_TABLE_ALEMBIC
):
stamp
(
directory
=
Config
.
MIGRATIONS_DIRECTORY
)
upgrade
(
directory
=
Config
.
MIGRATIONS_DIRECTORY
)
@
staticmethod
def
init_db_corpus
()
->
None
:
"""Initializes the corpus list if it is not already there and up to date."""
if
db
.
engine
.
dialect
.
has_table
(
db
.
engine
,
Config
.
DATABASE_TABLE_CORPUS
):
CorpusService
.
existing_corpora
=
db
.
session
.
query
(
Corpus
).
all
()
DatabaseService
.
commit
()
urn_dict
:
Dict
[
str
,
int
]
=
{
v
.
source_urn
:
i
for
i
,
v
in
enumerate
(
CorpusService
.
existing_corpora
)}
for
cc
in
CustomCorpusService
.
custom_corpora
:
if
cc
.
corpus
.
source_urn
in
urn_dict
:
existing_corpus
:
Corpus
=
CorpusService
.
existing_corpora
[
urn_dict
[
cc
.
corpus
.
source_urn
]]
CorpusService
.
update_corpus
(
title_value
=
cc
.
corpus
.
title
,
urn
=
cc
.
corpus
.
source_urn
,
author
=
cc
.
corpus
.
author
,
corpus_to_update
=
existing_corpus
,
citation_levels
=
[
cc
.
corpus
.
citation_level_1
,
cc
.
corpus
.
citation_level_2
,
cc
.
corpus
.
citation_level_3
])
else
:
citation_levels
:
List
[
CitationLevel
]
=
[]
for
cl
in
[
cc
.
corpus
.
citation_level_1
,
cc
.
corpus
.
citation_level_2
,
cc
.
corpus
.
citation_level_3
]:
citation_levels
+=
[
cl
]
if
cl
!=
CitationLevel
.
default
else
[]
CorpusService
.
add_corpus
(
title_value
=
cc
.
corpus
.
title
,
urn
=
cc
.
corpus
.
source_urn
,
group_name_value
=
cc
.
corpus
.
author
,
citation_levels
=
citation_levels
)
CorpusService
.
existing_corpora
=
db
.
session
.
query
(
Corpus
).
all
()
DatabaseService
.
commit
()
@
staticmethod
def
init_db_update_info
()
->
None
:
"""Initializes update entries for all resources that have not yet been created."""
if
db
.
engine
.
dialect
.
has_table
(
db
.
engine
,
Config
.
DATABASE_TABLE_UPDATEINFO
):
if
DatabaseService
.
has_table
(
Config
.
DATABASE_TABLE_UPDATEINFO
):
for
rt
in
ResourceType
:
ui_cts
:
UpdateInfo
=
db
.
session
.
query
(
UpdateInfo
).
filter_by
(
resource_type
=
rt
.
name
).
first
()
ui_cts
:
UpdateInfo
=
DatabaseService
.
query
(
UpdateInfo
,
filter_by
=
dict
(
resource_type
=
rt
.
name
),
first
=
True
)
if
ui_cts
is
None
:
ui_cts
=
UpdateInfo
.
from_dict
(
resource_type
=
rt
.
name
,
last_modified_time
=
1
,
created_time
=
datetime
.
utcnow
().
timestamp
())
...
...
@@ -85,38 +45,16 @@ class DatabaseService:
DatabaseService
.
commit
()
@
staticmethod
def
init_updater
(
app
:
Flask
)
->
None
:
"""Initializes a thread that regularly performs updates."""
app
.
app_context
().
push
()
while
True
:
try
:
DatabaseService
.
check_corpus_list_age
(
app
)
except
OperationalError
:
pass
import
gc
gc
.
collect
()
from
time
import
sleep
# sleep for 1 hour
sleep
(
Config
.
INTERVAL_CORPUS_AGE_CHECK
)
@
staticmethod
def
update_exercises
(
is_csm
:
bool
)
->
None
:
"""Deletes old exercises."""
if
db
.
engine
.
dialect
.
has_table
(
db
.
engine
,
Config
.
DATABASE_TABLE_EXERCISE
):
exercises
:
List
[
Exercise
]
=
db
.
session
.
query
(
Exercise
).
all
()
now
:
datetime
=
datetime
.
utcnow
()
for
exercise
in
exercises
:
exercise_datetime
:
datetime
=
datetime
.
fromtimestamp
(
exercise
.
last_access_time
)
# delete exercises that have not been accessed for a while, are not compatible anymore, or contain
# corrupted / empty data
if
(
now
-
exercise_datetime
).
total_seconds
()
>
Config
.
INTERVAL_EXERCISE_DELETE
or
\
not
exercise
.
urn
or
not
json
.
loads
(
exercise
.
solutions
):
db
.
session
.
delete
(
exercise
)
DatabaseService
.
commit
()
# manually add text complexity measures for old exercises
elif
not
exercise
.
text_complexity
:
ar
:
AnnisResponse
=
CorpusService
.
get_corpus
(
exercise
.
urn
,
is_csm
=
is_csm
)
tc
:
TextComplexity
=
TextComplexityService
.
text_complexity
(
TextComplexityMeasure
.
all
.
name
,
exercise
.
urn
,
is_csm
,
ar
.
graph_data
)
exercise
.
text_complexity
=
tc
.
all
DatabaseService
.
commit
()
def
query
(
table
:
Union
[
Corpus
,
Exercise
,
LearningResult
,
UpdateInfo
],
filter_by
:
dict
=
None
,
first
:
bool
=
False
)
->
Any
:
"""Executes a query on the database and rolls back the session if errors occur."""
try
:
ret_val
:
Query
=
db
.
session
.
query
(
table
)
if
filter_by
:
ret_val
=
ret_val
.
filter_by
(
**
filter_by
)
ret_val
=
ret_val
.
first
()
if
first
else
ret_val
.
all
()
DatabaseService
.
commit
()
return
ret_val
except
InvalidRequestError
:
db
.
session
.
rollback
()
return
None
mc_backend/mcserver/app/services/exerciseService.py
0 → 100644
View file @
a6a855f2
from
datetime
import
datetime
from
typing
import
List
,
Dict
import
rapidjson
as
json
from
mcserver
import
Config
from
mcserver.app
import
db
from
mcserver.app.models
import
AnnisResponse
,
TextComplexity
,
TextComplexityMeasure
,
GraphData
,
ExerciseData
from
mcserver.app.services
import
DatabaseService
,
CorpusService
,
TextComplexityService
,
AnnotationService
from
mcserver.models_auto
import
Exercise
from
openapi.openapi_server.models
import
Solution
class
ExerciseService
:
""" Service for creating new and managing old exercises. """
@
staticmethod
def
map_graph_data_to_exercise
(
graph_data_raw
:
Dict
,
xml_guid
:
str
,
solutions
:
List
[
Solution
]):
""" Creates an ExerciseData object from the separate parts. """
# create the basis for the download URL
xml_url
=
"/"
+
xml_guid
graph_data
:
GraphData
=
AnnotationService
.
map_graph_data
(
graph_data_raw
)
return
ExerciseData
(
graph
=
graph_data
,
solutions
=
solutions
,
uri
=
xml_url
)
@
staticmethod
def
update_exercises
(
is_csm
:
bool
)
->
None
:
"""Deletes old exercises."""
if
DatabaseService
.
has_table
(
Config
.
DATABASE_TABLE_EXERCISE
):
exercises
:
List
[
Exercise
]
=
DatabaseService
.
query
(
Exercise
)
now
:
datetime
=
datetime
.
utcnow
()
for
exercise
in
exercises
:
exercise_datetime
:
datetime
=
datetime
.
fromtimestamp
(
exercise
.
last_access_time
)
# delete exercises that have not been accessed for a while, are not compatible anymore, or contain
# corrupted / empty data
if
(
now
-
exercise_datetime
).
total_seconds
()
>
Config
.
INTERVAL_EXERCISE_DELETE
or
\
not
exercise
.
urn
or
not
json
.
loads
(
exercise
.
solutions
):
db
.
session
.
delete
(
exercise
)
DatabaseService
.
commit
()
# manually add text complexity measures for old exercises
elif
not
exercise
.
text_complexity
:
ar
:
AnnisResponse
=
CorpusService
.
get_corpus
(
exercise
.
urn
,
is_csm
=
is_csm
)
tc
:
TextComplexity
=
TextComplexityService
.
text_complexity
(
TextComplexityMeasure
.
all
.
name
,
exercise
.
urn
,
is_csm
,
ar
.
graph_data
)
exercise
.
text_complexity
=
tc
.
all
DatabaseService
.
commit
()
mc_backend/mocks.py
View file @
a6a855f2
...
...
@@ -38,6 +38,7 @@ class MockQuery:
self
.
ui
:
UpdateInfo
=
ui
def
all
(
self
):
# DO NOT MAKE THIS POINT TO THE DATABASE SERVICE, IT WILL BE MOCKED ANYWAY
return
db
.
session
.
query
(
Corpus
).
all
()
def
filter_by
(
self
,
**
kwargs
):
...
...
mc_backend/tests.py
View file @
a6a855f2
...
...
@@ -26,7 +26,7 @@ from gensim.models import Word2Vec
from
lxml
import
etree
from
networkx
import
MultiDiGraph
,
Graph
from
requests
import
HTTPError
from
sqlalchemy.exc
import
OperationalError
from
sqlalchemy.exc
import
OperationalError
,
InvalidRequestError
from
sqlalchemy.orm
import
session
from
werkzeug.wrappers
import
Response
...
...
@@ -40,7 +40,7 @@ from mcserver.app.models import ResourceType, FileType, ExerciseType, ExerciseDa
VocabularyCorpus
,
TextComplexityMeasure
,
CitationLevel
,
FrequencyItem
,
TextComplexity
,
Dependency
,
PartOfSpeech
,
\
Choice
,
XapiStatement
,
ExerciseMC
,
CorpusMC
,
make_solution_element_from_salt_id
,
Sentence
from
mcserver.app.services
import
AnnotationService
,
CorpusService
,
FileService
,
CustomCorpusService
,
DatabaseService
,
\
XMLservice
,
TextService
,
FrequencyService
XMLservice
,
TextService
,
FrequencyService
,
ExerciseService
from
mcserver.config
import
TestingConfig
,
Config
from
mcserver.models_auto
import
Corpus
,
Exercise
,
UpdateInfo
,
LearningResult
from
mocks
import
Mocks
,
MockResponse
,
MockW2V
,
MockQuery
,
TestHelper
...
...
@@ -140,14 +140,14 @@ class McTestCase(unittest.TestCase):
ui
:
UpdateInfo
=
UpdateInfo
.
from_dict
(
resource_type
=
ResourceType
.
cts_data
.
name
,
last_modified_time
=
lmt
.
timestamp
(),
created_time
=
1
)
mock
.
session
.
query
.
return_value
=
MockQuery
(
ui
)
response
:
Response
=
Mocks
.
app_dict
[
self
.
class_name
].
client
.
get
(
TestingConfig
.
SERVER_URI_CORPORA
,
query_string
=
dict
(
last_update_time
=
lut
))
response
:
Response
=
Mocks
.
app_dict
[
self
.
class_name
].
client
.
get
(
TestingConfig
.
SERVER_URI_CORPORA
,
query_string
=
dict
(
last_update_time
=
lut
))
data_json
=
json
.
loads
(
response
.
get_data
())
if
data_json
:
result
=
[
x
.
to_dict
()
for
x
in
result
]
self
.
assertEqual
(
data_json
,
result
)
with
patch
.
object
(
mcserver
.
app
.
api
.
corpusListAPI
,
"db"