Source code for metainfo.models

from django.db import models
#from reversion import revisions as reversion
import reversion
from django.db.models.signals import post_save, m2m_changed
from django.dispatch import receiver
from django.contrib.auth.models import Group

from vocabularies.models import CollectionType, TextType
from highlighter.models import Annotation
from .validators import date_validator

from datetime import datetime
import re
import unicodedata
from difflib import SequenceMatcher
#from helper_functions.highlighter import highlight_text


[docs]@reversion.register() class TempEntityClass(models.Model): """ Base class to bind common attributes to many classes. The common attributes are: written start and enddates recognized start and enddates which are derived by RegEx from the written dates. A review boolean field to mark an object as reviewed """ name = models.CharField(max_length=255, blank=True) review = models.BooleanField( default=False, help_text="Should be set to True, if the data record holds up quality standards.") start_date = models.DateField(blank=True, null=True) end_date = models.DateField(blank=True, null=True) start_date_written = models.CharField( max_length=255, blank=True, null=True, validators=[date_validator, ], verbose_name="Start", help_text="Please enter a date (DD).(MM).YYYY") end_date_written = models.CharField( max_length=255, blank=True, null=True, validators=[date_validator, ], verbose_name="End", help_text="Please enter a date (DD).(MM).YYYY") text = models.ManyToManyField("Text", blank=True) collection = models.ManyToManyField("Collection") status = models.CharField(max_length=100) source = models.ForeignKey('Source', blank=True, null=True) references = models.TextField(blank=True, null=True) notes = models.TextField(blank=True, null=True) def __str__(self): if self.name != "": # relation usually don´t have names return self.name else: return "(ID: {})".format(self.id)
[docs] def save(self, *args, **kwargs): """Adaption of the save() method of the class to automatically parse string-dates into date objects """ def match_date(date): """Function to parse string-dates into python date objects. """ date = date.strip() date = date.replace('-', '.') if re.match(r'[0-9]{4}$', date): dr = datetime.strptime(date, '%Y') dr2 = date elif re.match(r'[0-9]{1,2}\.[0-9]{1,2}\.[0-9]{4}$', date): dr = datetime.strptime(date, '%d.%m.%Y') dr2 = date elif re.match(r'[0-9]{4}\.\.\.$', date): dr = datetime.strptime(date, '%Y...') dr2 = re.match(r'([0-9]{4})\.\.\.$', date).group(1) elif re.match(r'[0-9]{4}\.[0-9]{1,2}\.\.$', date): dr = datetime.strptime(date, '%Y.%m..') dr2 = re.match(r'([0-9]{4})\.([0-9]{1,2})\.\.$', date).group(2) +'.'+re.match(r'([0-9]{4})\.([0-9]{1,2})\.\.$', date).group(1) elif re.match(r'[0-9]{4}\.[0-9]{1,2}\.[0-9]{1,2}$', date): dr = datetime.strptime(date, '%Y.%m.%d') dr3 = re.match(r'([0-9]{4})\.([0-9]{1,2})\.([0-9]{1,2})$', date) dr2 = dr3.group(3)+'.'+dr3.group(2)+'.'+dr3.group(1) elif re.match(r'^\s*$', date): dr = None dr2 = None else: dr = None dr2 = date return dr, dr2 if self.start_date_written: self.start_date, self.start_date_written = match_date(self.start_date_written) else: self.start_date = self.start_date_written = None if self.end_date_written: self.end_date, self.end_date_written = match_date(self.end_date_written) else: self.end_date = self.end_date_written = None if self.name: self.name = unicodedata.normalize('NFC', self.name) super(TempEntityClass, self).save(*args, **kwargs) return self
[docs]@reversion.register() class Source(models.Model): """ Holds information about entities and their relations""" orig_filename = models.CharField(max_length=255, blank=True) indexed = models.BooleanField(default=False) pubinfo = models.CharField(max_length=400, blank=True) author = models.CharField(max_length=255, blank=True) orig_id = models.PositiveIntegerField(blank=True, null=True) def __str__(self): if self.author != "" and self.orig_filename != "": return "{}, stored by {}".format( self.orig_filename, self.orig_filename) else: return "(ID: {})".format(self.id)
[docs]@reversion.register() class Collection(models.Model): """ Allows to group entities and relation. """ name = models.CharField(max_length=255) description = models.TextField(blank=True) collection_type = models.ForeignKey(CollectionType, blank=True, null=True) groups_allowed = models.ManyToManyField(Group) #parent_class = models.ForeignKey('self', blank=True, null=True) def __str__(self): return self.name
[docs]@reversion.register() class Text(models.Model): """ Holds unstructured text associeted with one ore many entities/relations. """ kind = models.ForeignKey(TextType, blank=True, null=True) text = models.TextField(blank=True) source = models.ForeignKey(Source, blank=True, null=True) def __str__(self): if self.text != "": return self.text else: return "(ID: {})".format(self.id) def save(self, *args, **kwargs): if self.pk is not None: orig = Text.objects.get(pk=self.pk) if orig.text != self.text: ann = Annotation.objects.filter(text_id=self.pk).order_by('start') seq = SequenceMatcher(None, orig.text, self.text) for a in ann: changed = False count = 0 for s in seq.get_matching_blocks(): count += 1 if s.a <= a.start and (s.a + s.size) >= a.end: a.start += (s.b - s.a) a.end += (s.b - s.a) a.save() changed = True if not changed: a.delete() # TODO: we might want to delete relations as well. super().save(*args, **kwargs)
[docs]@reversion.register() class Uri(models.Model): uri = models.URLField(blank=True, null=True, unique=True) domain = models.CharField(max_length=255, blank=True) rdf_link = models.URLField(blank=True) entity = models.ForeignKey(TempEntityClass, blank=True, null=True) # loaded set to True when RDF was loaded and parsed into the data model loaded = models.BooleanField(default=False) # Timestamp when file was loaded and parsed loaded_time = models.DateTimeField(blank=True, null=True) def __str__(self): return self.uri def get_web_object(self): result = { 'relation_pk': self.pk, 'relation_type': 'uri', 'related_entity': self.entity.name, 'uri': self.uri, } return result
[docs]@reversion.register() class UriCandidate(models.Model): """Used to store the URI candidates for automatically generated entities. """ uri = models.URLField() confidence = models.FloatField(blank=True, null=True) responsible = models.CharField(max_length=255) entity = models.ForeignKey(TempEntityClass, blank=True, null=True)
@receiver(post_save, sender=Uri, dispatch_uid="remove_default_uri") def remove_default_uri(sender, instance, **kwargs): if Uri.objects.filter(entity=instance.entity).count() > 1: Uri.objects.filter(entity=instance.entity, domain='apis default').delete()