In [1]:
# import packages
import random
import warnings
warnings.filterwarnings('ignore')
import functools
import itertools
from typing import List, Sequence, Tuple

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rdflib
from sklearn.manifold import TSNE
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.svm import SVC

import sys
sys.path.append('pyRDF2Vec')
sys.path.append('pyRDF2Vec/pyrdf2vec')

from pyrdf2vec import RDF2VecTransformer
from pyrdf2vec.graphs import KG
from pyrdf2vec.embedders import Word2Vec
from pyrdf2vec.walkers import (
    AnonymousWalker,
    CommunityWalker,
    HalkWalker,
    NGramWalker,
    RandomWalker,
    WalkletWalker,
    WeisfeilerLehmanWalker,
)
from pyrdf2vec.samplers import (
    ObjPredFreqSampler,
    PredFreqSampler,
    UniformSampler,
    ObjFreqSampler,
    PageRankSampler,
)

import pickle

In [2]:
# memdefinisikan entities yang ada menggunakan csv
data = pd.read_csv('Node2Vec 100')
entities = data['uri']

label_predicates = [
     'http://www.w3.org/1999/02/22-rdf-syntax-ns/',
     'http://www.w3.org/2000/01/rdf-schema/',
     'http://www.w3.org/2002/07/owl/',
     'http://www.w3.org/2001/XMLSchema/',
     'http://xmlns.com/foaf/0.1/',
     'http://halal.addi.is.its.ac.id/halalv.ttl/',
     'http://halal.addi.is.its.ac.id/foodproducts/',
     'http://halal.addi.is.its.ac.id/ingredients/',
     'http://halal.addi.is.its.ac.id/certificates/',
     'http://halal.addi.is.its.ac.id/sources/',
     'http://halal.addi.is.its.ac.id/manufactures/'
     'http://purl.org/foodontology/',
     'http://data.lirmm.fr/ontologies/food/',
     'http://purl.org/goodrelations/v1/',
     'http://dbpedia.org/resource/',
]

In [3]:
entities

0        http://halal.addi.is.its.ac.id/manufactures/Je...
1        http://halal.addi.is.its.ac.id/manufactures/Am...
2        http://halal.addi.is.its.ac.id/manufactures/St...
3        http://halal.addi.is.its.ac.id/manufactures/Li...
4        http://halal.addi.is.its.ac.id/manufactures/Yi...
                               ...                        
21234    http://halal.addi.is.its.ac.id/manufactures/Pf...
21235    http://halal.addi.is.its.ac.id/manufactures/Te...
21236    http://halal.addi.is.its.ac.id/manufactures/By...
21237    http://halal.addi.is.its.ac.id/manufactures/My...
21238    http://halal.addi.is.its.ac.id/manufactures/Mr...
Name: uri, Length: 21239, dtype: object

In [4]:
# mendefinisikan knowledge graph menggunakan file n-triple
from pyrdf2vec.graphs import KG
kg = KG('rdf.nt', 
        label_predicates=[rdflib.URIRef(x) for x in label_predicates])

In [5]:
# mencari entities yang ada di dalam knowledge graph
filtered_entities = [e for e in entities if e in kg._entities]
filtered_entities

['http://halal.addi.is.its.ac.id/manufactures/Jeunesse_Llc',
 'http://halal.addi.is.its.ac.id/manufactures/American_Halal_Co_Inc',
 'http://halal.addi.is.its.ac.id/manufactures/Stonyfield',
 'http://halal.addi.is.its.ac.id/manufactures/Lian_Hoe_Tea_Coffee_M_Sdn_Bhd',
 'http://halal.addi.is.its.ac.id/manufactures/Yi_Quan_Trading',
 'http://halal.addi.is.its.ac.id/manufactures/Solgar_Inc',
 'http://halal.addi.is.its.ac.id/manufactures/Hong_Kong_Roast_Sdn_Bhd',
 'http://halal.addi.is.its.ac.id/manufactures/G_G_Century_M_Sdn_Bhd',
 'http://halal.addi.is.its.ac.id/manufactures/Uno_Nutrition_Sdn_Bhd',
 'http://halal.addi.is.its.ac.id/manufactures/Mai_Wai_Foods_Sdn_Bhd',
 'http://halal.addi.is.its.ac.id/manufactures/Gamat_Emas_Sdn_Bhd',
 'http://halal.addi.is.its.ac.id/manufactures/Salim_Mk_Frozen_Sdn_Bhd',
 'http://halal.addi.is.its.ac.id/manufactures/Sun_Maid_Growers_Of_California',
 'http://halal.addi.is.its.ac.id/manufactures/Punjab_Milk_Foods',
 'http://halal.addi.is.its.ac.id/manufactur

In [6]:
not_found = set(entities) - set(filtered_entities)
print(f'{not_found} could not be found in the KG! Removing them...')
entities = filtered_entities

{nan} could not be found in the KG! Removing them...


In [7]:
# menjalankan transformer untuk menghasilkan embedding
transformer = RDF2VecTransformer(walkers=[RandomWalker(2, 5)])
walk_embeddings = transformer.fit(kg, entities).transform(entities)

In [8]:
print(walk_embeddings)

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [9]:
# export ke bentuk csv
np.savetxt("rdf2vecbarusidang.csv", walk_embeddings, delimiter=",")

In [46]:
embedding_df2 = pd.read_csv('rdf2vecbaru.csv', delimiter = ',') 
embedding_df2

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,91,92,93,94,95,96,97,98,99,100
0,-0.009982,-0.010785,0.001290,-0.002210,0.012950,-0.014055,-0.009258,0.002966,-0.011494,-0.014278,...,0.002688,0.001218,0.002677,-0.004382,0.008542,0.016461,-0.004507,-0.009144,-0.017365,-0.003578
1,0.001167,-0.009434,0.000645,-0.001031,0.008766,-0.000232,-0.008129,0.005552,-0.006562,-0.005205,...,0.003313,0.004367,0.006992,-0.005176,-0.003665,0.001302,-0.007643,-0.004331,-0.007680,0.000269
2,-0.002369,-0.013097,0.002923,-0.011869,0.007506,-0.008881,-0.008147,0.002153,-0.007135,-0.005334,...,0.009371,0.006710,0.008013,-0.005424,0.010427,0.012458,-0.009169,-0.003693,-0.007440,-0.002895
3,-0.001649,-0.005344,-0.003339,-0.005358,-0.001656,0.002563,-0.005313,-0.002229,0.001380,-0.000410,...,-0.000795,-0.000445,0.003015,0.000822,-0.003762,0.007071,-0.006090,0.000607,0.000325,0.000925
4,0.002971,-0.004289,-0.001187,-0.002908,0.000447,-0.001315,-0.005677,-0.002215,-0.003805,0.002946,...,0.005115,0.004029,-0.003079,-0.003695,-0.002872,0.003256,-0.002444,-0.001074,-0.000400,-0.003808
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21232,-0.015791,-0.016935,0.000231,-0.002209,0.015589,-0.019267,-0.013298,0.003206,-0.012608,-0.007896,...,0.008705,0.009454,0.003483,-0.006225,0.009099,0.015249,-0.013845,-0.000098,-0.022265,0.001284
21233,0.004632,-0.003782,0.004087,-0.002548,0.013088,-0.005177,-0.008472,-0.003216,-0.009668,0.003497,...,0.012949,0.012496,-0.000363,-0.004499,0.002768,0.005954,-0.015915,-0.008941,-0.008630,-0.004899
21234,0.004317,-0.004459,0.001513,-0.001362,0.008050,-0.010236,-0.001776,-0.002006,-0.006190,0.000638,...,0.009008,0.004254,-0.003833,-0.006258,-0.000842,0.011535,-0.012161,-0.005507,-0.001593,-0.000905
21235,-0.003378,-0.014259,0.000830,-0.001121,0.007841,-0.007457,-0.013431,0.004210,-0.013931,-0.000238,...,0.008928,-0.000022,-0.002112,-0.002604,0.004747,0.009997,-0.007762,-0.016962,-0.007368,-0.003753


In [56]:
df = pd.DataFrame(entities)
df = df.rename(columns={0 : 'uri'})
df

Unnamed: 0,uri
0,http://halal.addi.is.its.ac.id/manufactures/Je...
1,http://halal.addi.is.its.ac.id/manufactures/Am...
2,http://halal.addi.is.its.ac.id/manufactures/St...
3,http://halal.addi.is.its.ac.id/manufactures/Li...
4,http://halal.addi.is.its.ac.id/manufactures/Yi...
...,...
21232,http://halal.addi.is.its.ac.id/manufactures/Pf...
21233,http://halal.addi.is.its.ac.id/manufactures/Te...
21234,http://halal.addi.is.its.ac.id/manufactures/By...
21235,http://halal.addi.is.its.ac.id/manufactures/My...


In [57]:
embedding_df2.to_csv('embed rapih.csv', index='true')
df.to_csv('uri rapih.csv', index='true')

In [58]:
gabung1 = pd.read_csv('embed rapih.csv') 
gabung1 = gabung1.rename(columns={'Unnamed: 0': 'index'})
gabung1

Unnamed: 0,index,1,2,3,4,5,6,7,8,9,...,91,92,93,94,95,96,97,98,99,100
0,0,-0.009982,-0.010785,0.001290,-0.002210,0.012950,-0.014055,-0.009258,0.002966,-0.011494,...,0.002688,0.001218,0.002677,-0.004382,0.008542,0.016461,-0.004507,-0.009144,-0.017365,-0.003578
1,1,0.001167,-0.009434,0.000645,-0.001031,0.008766,-0.000232,-0.008129,0.005552,-0.006562,...,0.003313,0.004367,0.006992,-0.005176,-0.003665,0.001302,-0.007643,-0.004331,-0.007680,0.000269
2,2,-0.002369,-0.013097,0.002923,-0.011869,0.007506,-0.008881,-0.008147,0.002153,-0.007135,...,0.009371,0.006710,0.008013,-0.005424,0.010427,0.012458,-0.009169,-0.003693,-0.007440,-0.002895
3,3,-0.001649,-0.005344,-0.003339,-0.005358,-0.001656,0.002563,-0.005313,-0.002229,0.001380,...,-0.000795,-0.000445,0.003015,0.000822,-0.003762,0.007071,-0.006090,0.000607,0.000325,0.000925
4,4,0.002971,-0.004289,-0.001187,-0.002908,0.000447,-0.001315,-0.005677,-0.002215,-0.003805,...,0.005115,0.004029,-0.003079,-0.003695,-0.002872,0.003256,-0.002444,-0.001074,-0.000400,-0.003808
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21232,21232,-0.015791,-0.016935,0.000231,-0.002209,0.015589,-0.019267,-0.013298,0.003206,-0.012608,...,0.008705,0.009454,0.003483,-0.006225,0.009099,0.015249,-0.013845,-0.000098,-0.022265,0.001284
21233,21233,0.004632,-0.003782,0.004087,-0.002548,0.013088,-0.005177,-0.008472,-0.003216,-0.009668,...,0.012949,0.012496,-0.000363,-0.004499,0.002768,0.005954,-0.015915,-0.008941,-0.008630,-0.004899
21234,21234,0.004317,-0.004459,0.001513,-0.001362,0.008050,-0.010236,-0.001776,-0.002006,-0.006190,...,0.009008,0.004254,-0.003833,-0.006258,-0.000842,0.011535,-0.012161,-0.005507,-0.001593,-0.000905
21235,21235,-0.003378,-0.014259,0.000830,-0.001121,0.007841,-0.007457,-0.013431,0.004210,-0.013931,...,0.008928,-0.000022,-0.002112,-0.002604,0.004747,0.009997,-0.007762,-0.016962,-0.007368,-0.003753


In [59]:
gabung2 = pd.read_csv('uri rapih.csv') 
gabung2 = gabung2.rename(columns={'Unnamed: 0': 'index'})
gabung2

Unnamed: 0,index,uri
0,0,http://halal.addi.is.its.ac.id/manufactures/Je...
1,1,http://halal.addi.is.its.ac.id/manufactures/Am...
2,2,http://halal.addi.is.its.ac.id/manufactures/St...
3,3,http://halal.addi.is.its.ac.id/manufactures/Li...
4,4,http://halal.addi.is.its.ac.id/manufactures/Yi...
...,...,...
21232,21232,http://halal.addi.is.its.ac.id/manufactures/Pf...
21233,21233,http://halal.addi.is.its.ac.id/manufactures/Te...
21234,21234,http://halal.addi.is.its.ac.id/manufactures/By...
21235,21235,http://halal.addi.is.its.ac.id/manufactures/My...


In [60]:
gabungan = gabung2.merge(gabung1, on = 'index')
gabungan = gabungan.drop(columns='index')
gabungan

Unnamed: 0,uri,1,2,3,4,5,6,7,8,9,...,91,92,93,94,95,96,97,98,99,100
0,http://halal.addi.is.its.ac.id/manufactures/Je...,-0.009982,-0.010785,0.001290,-0.002210,0.012950,-0.014055,-0.009258,0.002966,-0.011494,...,0.002688,0.001218,0.002677,-0.004382,0.008542,0.016461,-0.004507,-0.009144,-0.017365,-0.003578
1,http://halal.addi.is.its.ac.id/manufactures/Am...,0.001167,-0.009434,0.000645,-0.001031,0.008766,-0.000232,-0.008129,0.005552,-0.006562,...,0.003313,0.004367,0.006992,-0.005176,-0.003665,0.001302,-0.007643,-0.004331,-0.007680,0.000269
2,http://halal.addi.is.its.ac.id/manufactures/St...,-0.002369,-0.013097,0.002923,-0.011869,0.007506,-0.008881,-0.008147,0.002153,-0.007135,...,0.009371,0.006710,0.008013,-0.005424,0.010427,0.012458,-0.009169,-0.003693,-0.007440,-0.002895
3,http://halal.addi.is.its.ac.id/manufactures/Li...,-0.001649,-0.005344,-0.003339,-0.005358,-0.001656,0.002563,-0.005313,-0.002229,0.001380,...,-0.000795,-0.000445,0.003015,0.000822,-0.003762,0.007071,-0.006090,0.000607,0.000325,0.000925
4,http://halal.addi.is.its.ac.id/manufactures/Yi...,0.002971,-0.004289,-0.001187,-0.002908,0.000447,-0.001315,-0.005677,-0.002215,-0.003805,...,0.005115,0.004029,-0.003079,-0.003695,-0.002872,0.003256,-0.002444,-0.001074,-0.000400,-0.003808
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21232,http://halal.addi.is.its.ac.id/manufactures/Pf...,-0.015791,-0.016935,0.000231,-0.002209,0.015589,-0.019267,-0.013298,0.003206,-0.012608,...,0.008705,0.009454,0.003483,-0.006225,0.009099,0.015249,-0.013845,-0.000098,-0.022265,0.001284
21233,http://halal.addi.is.its.ac.id/manufactures/Te...,0.004632,-0.003782,0.004087,-0.002548,0.013088,-0.005177,-0.008472,-0.003216,-0.009668,...,0.012949,0.012496,-0.000363,-0.004499,0.002768,0.005954,-0.015915,-0.008941,-0.008630,-0.004899
21234,http://halal.addi.is.its.ac.id/manufactures/By...,0.004317,-0.004459,0.001513,-0.001362,0.008050,-0.010236,-0.001776,-0.002006,-0.006190,...,0.009008,0.004254,-0.003833,-0.006258,-0.000842,0.011535,-0.012161,-0.005507,-0.001593,-0.000905
21235,http://halal.addi.is.its.ac.id/manufactures/My...,-0.003378,-0.014259,0.000830,-0.001121,0.007841,-0.007457,-0.013431,0.004210,-0.013931,...,0.008928,-0.000022,-0.002112,-0.002604,0.004747,0.009997,-0.007762,-0.016962,-0.007368,-0.003753


In [61]:
gabungan.to_csv('RDF2Vec ready.csv')