# The MIT License (MIT) # Copyright (c) 2014-2017 University of Bristol
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
# OR OTHER DEALINGS IN THE SOFTWARE.
from time_interval import TimeIntervals, parse_time_tuple, RelativeTimeInterval, TimeInterval
from utils import Hashable, TypedBiDict, Printable, utcnow
from models import StreamStatusModel, TimeIntervalModel, StreamDefinitionModel
import logging
# from copy import deepcopy
from collections import Iterable, namedtuple, deque
from datetime import datetime
from mongoengine.context_managers import switch_db
[docs]class StreamView(Printable):
"""
Simple helper class for storing streams with a time interval (i.e. a "view" on a stream)
"""
def __init__(self, stream, time_interval):
if not isinstance(stream, Stream):
raise ValueError("stream must be Stream object")
if not isinstance(time_interval, TimeInterval):
raise ValueError("relative_time_interval must be TimeInterval object")
self.stream = stream
self.time_interval = time_interval
def __iter__(self):
for item in self.stream.channel.get_results(self.stream, self.time_interval):
yield item
[docs] def items(self):
"""
Return all results as a list
:return:
"""
return list(self.iteritems())
[docs] def iteritems(self):
return iter(self)
[docs] def timestamps(self):
return list(self.itertimestamps())
[docs] def itertimestamps(self):
return map(lambda x: x.timestamp, self.iteritems())
[docs] def values(self):
return list(self.itervalues())
[docs] def itervalues(self):
return map(lambda x: x.value, self.iteritems())
[docs] def last(self, default=None):
item = default
for item in self:
pass
return item
[docs] def first(self, default=None):
for el in self:
return el
return default
[docs] def head(self, n):
i = 0
for d in self.iteritems():
i += 1
if i > n:
break
yield d
[docs] def tail(self, n):
return iter(deque(self, maxlen=n))
[docs] def component(self, key):
# TODO: is this needed now we have a Component() tool?
for (time, data) in self.iteritems():
if key in data:
yield StreamInstance(time, data[key])
[docs] def component_filter(self, key, values):
# TODO: is this needed now we have a ComponentFilter() tool?
for (time, data) in self.iteritems():
if key in data and data[key] in values:
yield StreamInstance(time, data)
[docs] def delete_nones(self):
# TODO: Test this against ComponentFilter(key, values=[None], complement=true)
for (time, data) in self.iteritems():
data2 = {}
for (key, value) in data.items():
if value is not None:
data2[key] = value
yield StreamInstance(time, data2)
[docs]class StreamInstance(namedtuple("StreamInstance", "timestamp value")):
"""
Simple helper class for storing data instances that's a bit neater than simple tuples
"""
def __new__(cls, timestamp, value):
if not isinstance(timestamp, datetime):
raise ValueError("Timestamp must be datetime.datetime")
return super(StreamInstance, cls).__new__(cls, timestamp, value)
[docs]class StreamDict(TypedBiDict):
"""
Custom bi-directional dictionary where keys are StreamID objects and values are Stream objects.
Raises ValueDuplicationError if the same Stream is added again
"""
def __init__(self, *args, **kwargs):
super(StreamDict, self).__init__(StreamId, Stream, *args, **kwargs)
[docs]class StreamId(Hashable):
"""
Helper class for stream identifiers. A stream identifier contains the stream name and any meta-data
"""
def __init__(self, name, meta_data=None):
self.name = name
if meta_data:
if isinstance(meta_data, dict):
self.meta_data = tuple(sorted(meta_data.items()))
elif isinstance(meta_data, tuple):
self.meta_data = meta_data
else:
raise ValueError("Expected dict or tuple, got {}".format(type(meta_data)))
else:
self.meta_data = tuple()
# self.meta_data = meta_data if meta_data else {}
def __str__(self):
if self.meta_data:
# return self.name + ": [" + ", ".join("{}={}".format(k, v) for k, v in self.meta_data.items()) + "]"
return self.name + ": [" + ", ".join("{}={}".format(k, v) for k, v in self.meta_data) + "]"
else:
return self.name
def __repr__(self):
return "{}(name={}, meta_data={})".format(
self.__class__.__name__,
repr(self.name),
repr(self.meta_data)
)
def __eq__(self, other):
return isinstance(other, StreamId) and \
self.name == other.name and \
sorted(self.meta_data) == sorted(other.meta_data)
[docs] def as_dict(self):
return dict(name=self.name, meta_data=dict(self.meta_data) if self.meta_data else {})
[docs] def as_raw(self):
"""
Return a representation of this object that can be used with mongoengine Document.objects(__raw__=x)
Example:
>>> stream_id = StreamId(name='test', meta_data=((u'house', u'1'), (u'resident', u'1')))
>>> stream_id.as_raw()
{u'stream_id.meta_data.house': u'1', u'stream_id.meta_data.resident': u'1', 'stream_id.name': 'test'}
:return: The raw representation of this object.
"""
d = dict(('stream_id.meta_data.' + k, v) for k, v in self.meta_data)
d['stream_id.name'] = self.name
return d
[docs]class Stream(Hashable):
"""
Stream reference class
"""
_calculated_intervals = None
defined = False
def __init__(self, channel, stream_id, calculated_intervals, sandbox):
"""
:param channel: The channel to which this stream belongs
:param stream_id: The unique identifier for this string
:param calculated_intervals: The time intervals in which this has been calculated
:param sandbox: The sandbox in which this stream lives
:type channel: BaseChannel
:type stream_id: StreamId
:type calculated_intervals: TimeIntervals, None
:type sandbox: str, unicode, None
"""
self.channel = channel
if not isinstance(stream_id, StreamId):
raise TypeError(str(type(stream_id)))
self.stream_id = stream_id
# self.get_results_func = get_results_func
if calculated_intervals:
if not isinstance(calculated_intervals, TimeIntervals):
raise TypeError(str(type(calculated_intervals)))
self.calculated_intervals = calculated_intervals
else:
self.calculated_intervals = TimeIntervals()
self.tool_reference = None # needed to traverse the graph outside of workflows
self.sandbox = sandbox
# Here we define the output type. When modifiers are applied, this changes
# self.output_format = 'doc_gen'
def __str__(self):
return "{}(stream_id={}, channel_id={})".format(
self.__class__.__name__,
self.stream_id,
self.channel.channel_id)
def __repr__(self):
return str(self)
def __eq__(self, other):
return str(self) == str(other)
@property
def calculated_intervals(self):
# TODO: this should be read from the stream_status collection if it's in the database channel
return self._calculated_intervals
@calculated_intervals.setter
def calculated_intervals(self, value):
# TODO: this should be written to the stream_status collection if it's in the database channel
self._calculated_intervals = value
@property
def writer(self):
return self.channel.get_stream_writer(self)
[docs] def window(self, time_interval):
"""
Gets a view on this stream for the time interval given
:param time_interval: either a TimeInterval object or (start, end) tuple of type str or datetime
:type time_interval: Iterable, TimeInterval
:return: a stream view object
"""
if isinstance(time_interval, TimeInterval):
pass
elif isinstance(time_interval, Iterable):
time_interval = parse_time_tuple(*time_interval)
if isinstance(time_interval, RelativeTimeInterval):
raise NotImplementedError
elif isinstance(time_interval, RelativeTimeInterval):
raise NotImplementedError
else:
raise TypeError("Expected TimeInterval or (start, end) tuple of type str or datetime, got {}"
.format(type(time_interval)))
return StreamView(stream=self, time_interval=time_interval)
[docs]class DatabaseStream(Stream):
"""
Simple subclass that overrides the calculated intervals property
"""
def __init__(self, channel, stream_id, calculated_intervals, sandbox):
super(DatabaseStream, self).__init__(
channel=channel, stream_id=stream_id, calculated_intervals=calculated_intervals, sandbox=sandbox)
[docs] def save_definition(self):
"""
Saves the stream definition to the database. This assumes that the definition doesn't already exist, and will
raise an exception if it does.
:return: None
"""
with switch_db(StreamDefinitionModel, 'hyperstream'):
stream_definition = StreamDefinitionModel(
stream_id=self.stream_id.as_dict(),
channel_id=self.channel.channel_id,
sandbox=self.sandbox)
stream_definition.save()
@property
def calculated_intervals(self):
with switch_db(StreamStatusModel, 'hyperstream'):
status = StreamStatusModel.objects.get(__raw__=self.stream_id.as_raw())
calculated_intervals = TimeIntervals(map(lambda x: TimeInterval(x.start, x.end), status.calculated_intervals))
return calculated_intervals
@calculated_intervals.setter
def calculated_intervals(self, intervals):
"""
Updates the calculated intervals in the database. Performs an upsert
:param intervals: The calculated intervals
:return: None
"""
with switch_db(StreamStatusModel, 'hyperstream'):
StreamStatusModel.objects(__raw__=self.stream_id.as_raw()).modify(
upsert=True,
set__stream_id=self.stream_id.as_dict(),
set__last_updated=utcnow(),
set__calculated_intervals=tuple(map(lambda x: TimeIntervalModel(start=x.start, end=x.end), intervals))
)