Source code for highlighter.models

from django.db import models
from django.contrib.auth.models import User
from gm2m import GM2MField
#from reversion import revisions as reversion
import reversion
from django.contrib.contenttypes.models import ContentType
import re

from vocabularies.models import VocabNames
#from metainfo.models import Text


##############################################
#
#   Project classes
#
##############################################


[docs]class Project(models.Model): """Holds information on registered project. Id is used by the JavaScript function to target the right endpoints """ name = models.CharField(max_length=255) # name of the project registered user = models.ForeignKey(User) # foreignkey to the User who created the project description = models.TextField(blank=True, null=True) base_url = models.URLField(blank=True, null=True) # optional base URL to restrict highlights to a base URL store_text = models.BooleanField(default=False) # Whether to store the text in the tool or work with ids only. def __str__(self): return self.name
[docs]class TextHigh(models.Model): """ Holds unstructured text associated with one ore many entities/relations. """ title = models.CharField(max_length=255, blank=True, null=True) text_choices = (('ft', 'Full Text'), ('id', 'Text ID'), ('cl', 'Text Class')) text_type = models.CharField(max_length=3, choices=text_choices, default='cl') uri = models.URLField(blank=True, null=True) # uri of vocab used to identify the text type text = models.TextField(blank=True, null=True) text_id = models.PositiveIntegerField(blank=True, null=True) # UID to identify the text within the project. Allows to not upload the texts. text_class = models.CharField(max_length=255, blank=True, null=True) project = models.ForeignKey(Project, blank=True, null=True) def __str__(self): return self.title
[docs]class AnnotationProject(models.Model): '''Every Project can have several Annotation Projects. Annotation Projects are used to track the precission etc. of automatic annotation tools. ''' name = models.CharField(max_length=255) description = models.TextField(blank=True, null=True) def __str__(self): return self.name
[docs]@reversion.register() class Annotation(models.Model): """Class storing highlights in full-texts """ status_choices = (('del', 'deleted'), ('ap', 'approved')) start = models.PositiveIntegerField() # number of string to start highlight end = models.PositiveIntegerField() # number of string to end highlight entity_link = GM2MField( 'entities.Person', 'entities.Institution', 'entities.Place', 'entities.Event', 'entities.Work', 'relations.PersonPerson', 'relations.PersonPlace', 'relations.PersonInstitution', 'relations.PersonEvent', 'relations.PersonWork', 'relations.InstitutionPlace', 'relations.InstitutionEvent', 'relations.InstitutionWork', 'relations.InstitutionInstitution', 'relations.PlaceEvent', 'relations.PlaceWork', 'relations.PlacePlace', 'relations.EventWork', 'relations.EventEvent', 'relations.WorkWork' ) # generic field to store the relation object entity_candidate = models.ManyToManyField('metainfo.UriCandidate', blank=True) orig_string = models.CharField(max_length=255, blank=True, null=True) # string originally highlighted text = models.ForeignKey('metainfo.Text') parent = models.ForeignKey('self', related_name='parent_annotation', blank=True, null=True) #parent annotations are used to allow for a stacked design of the annotation project user_added = models.ForeignKey(User, blank=True, null=True) #changed from default=12 annotation_project = models.ForeignKey(AnnotationProject, blank=True, null=True) status = models.CharField(max_length=4, choices=status_choices, blank=True, null=True) def __str__(self): return "{}".format(self.pk) def trim_whitespaces(self, char): test = True while test and char < self.text_length: if self.text.text[char] == ' ': test = True char += 1 else: return char
[docs] def annotation_hash(self, format_string='start_end_text_ent_entid'): """ Function that returns a hash of the annotaion used to calculate inter-annotator agreement. :return: """ matching = {'start': 'start', 'end': 'end', 'text': 'text_id'} f_list = format_string.split('_') if 'start' not in f_list or 'end' not in f_list or 'text' not in f_list: raise AttributeError('Start, end and text must be present in the format_string parameter') res = '' self.text_length = len(self.text.text) self.start = self.trim_whitespaces(self.start) self.end = self.trim_whitespaces(self.end) for f in f_list: if f in matching.keys(): res += str(getattr(self, matching[f])) else: ent_link = self.entity_link.all() if len(ent_link) > 0: cont_lst = str(ContentType.objects.get_for_model(ent_link[0])).split() if f == 'ent': res += ''.join(cont_lst) elif f == 'entid': for cont in cont_lst: if hasattr(ent_link[0], 'related_'+cont.strip()): res += str(getattr(ent_link[0], 'related_'+cont.strip()+'_id')) elif hasattr(ent_link[0], 'related_'+cont.strip()+'A'): res += str(getattr(ent_link[0], 'related_'+cont.strip()+'A_id')) elif hasattr(ent_link[0], 'related_'+cont.strip()+'B'): res += str(getattr(ent_link[0], 'related_'+cont.strip()+'B_id')) res += '-' res = res[:-1] elif f == 'rel': res += str(getattr(ent_link[0], 'relation_type_id')) else: res += 'NONE' res += '_' return res[:-1]
def get_html_markup(self): if not len(self.entity_link.all()) == 1: return None rel_entity = self.entity_link.all()[0] if hasattr(rel_entity, 'relation_type'): entity_kind = str(rel_entity.relation_type.pk) elif hasattr(rel_entity, 'kind'): entity_kind = str(rel_entity.kind.pk) entity_type = type(rel_entity).__name__ entity_type_app = ContentType.objects.get(model=entity_type).app_label ent_lst_pk = [] user_added = self.user_added.username[:2] if entity_type_app.lower() == 'relations': for x in dir(rel_entity): c = re.match('related_\w+_id', x) if c: ent_lst_pk.append(str(getattr(rel_entity, c.group(0)))) start_span = '''<mark class="highlight hl_text_{}" data-hl-type="simple" data-hl-start="{}" data-hl-end="{}" data-hl-text-id="{}" data-hl-ann-id="{}" data-entity-class="{}" data-entity-pk="{}" data-related-entity-pk="{}" data-entity-type="{}" data-user-added={}>'''.format(entity_kind, self.start, self.end, self.text_id, self.pk, entity_type, rel_entity.pk, ','.join(ent_lst_pk), entity_type_app, user_added) return start_span
############################################## # # Configuration Classes # ##############################################
[docs]class VocabularyAPI(models.Model): """Class storing the information of the endpoints of the vocabularies. """ method_lst = (('l','local'), ('o', 'open skos')) name = models.CharField(max_length=255, null=True, blank=True) api_endpoint = models.CharField(max_length=255, null=True, blank=True) method = models.CharField(max_length=2, choices=method_lst, default='l') def __str__(self): return self.name