#!/usr/bin/python3

# Copyright (c) 2018 Hong Xu <hongx@usc.edu>.

# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

# This script generates the dependency network of Fedora packages. It generates two files: *.edges
# and *.desc. Simply run this file on a Fedora system (>= version 22).

# If you use this script in your published work, please cite "Shudan Zhong and Hong Xu. 2018.
# Learning Embeddings of Directed Networks with Text-Associated Nodes with Application in Software
# Package Dependency Networks. arXiv:1809.02270."

from collections import defaultdict
import itertools

import dnf


base = dnf.Base()
base.conf.cachedir = '.'
base.read_all_repos()
for key in list(base.repos):
    if key != 'fedora':
        del base.repos[key]

base.update_cache()
base.fill_sack(load_system_repo=False)
q = base.sack.query().filter(latest=True)

pkg_to_id = dict()

item_provided_by = defaultdict(list)
item_required_by = defaultdict(list)

with open('fedora.desc', 'w+') as desc_f:
    print("Writing descriptions...")
    for i, pkg in enumerate(q, 1):
        if i % 100 == 0:
            print('processed {}'.format(i))
        pkg_to_id[pkg.name] = i
        print('[PACKAGE] {} {}'.format(i, pkg.name), file=desc_f)
        print(pkg.description, end='\n\n', file=desc_f)

        for item in itertools.chain(pkg.files, pkg.provides):
            item_provided_by[str(item)].append(i)
        for item in pkg.requires:
            item_required_by[str(item)].append(i)

print("Generating edges...")
edges = set()
for item, pkgs_depending in item_required_by.items():
    for pkg_depended in item_provided_by[item]:
        for pkg_depending in pkgs_depending:
            edges.add((pkg_depending, pkg_depended))

with open('fedora.edges', 'w+') as edge_f:
    for e in edges:
        print('{} {}'.format(e[0], e[1]), file=edge_f)
