# import packages
import brightway2 as bw
import pandas as pd
import numpy as np
from scipy import stats
from matplotlib import pyplot as plt
from lci_to_bw2 import * # import all the functions of this module

# open a project with ecoinvent v.3.9.1 consequential system model
bw.projects.set_current('advlca23')

# Import the dummy product system

# import data from csv
mydata = pd.read_csv('ALIGNED-LCI-biobased-product-dummy.csv', header = 0, sep = ",") # using csv file avoids encoding problem
mydata.head()

# keep only the columns not needed
mydb = mydata[['Activity database','Activity code','Activity name','Activity unit','Activity type',
               'Exchange database','Exchange input','Exchange amount','Exchange unit','Exchange type',
               'Exchange uncertainty type','Exchange loc','Exchange scale','Exchange negative', 
               'Simapro name',	'Simapro unit', 'Simapro type']].copy()

mydb = mydata.copy()

mydb['Exchange uncertainty type'] = mydb['Exchange uncertainty type'].fillna(0).astype(int) # uncertainty as integers
# Note: to avoid having both nan and values in the uncertainty column I use zero as default

# Create dictionary in bw format and write database to disk. 
# Shut down all other notebooks using the same project before doing this
bw2_db = lci_to_bw2(mydb) # a function from the lci_to_bw2 module

# write database
bw.Database('ALIGNED-biob-prod-dummy').write(bw2_db)

Writing activities to SQLite3 database:
0% [#####] 100% | ETA: 00:00:00
Total time elapsed: 00:00:00

Title: Writing activities to SQLite3 database:
  Started: 03/14/2024 20:33:33
  Finished: 03/14/2024 20:33:33
  Total time elapsed: 00:00:00
  CPU %: 40.50
  Memory %: 1.35

# check what foreground activities are included
for act in bw.Database('ALIGNED-biob-prod-dummy'):
    print(act, act['code'])

'Biobased-product-use' (year, None, None) f9eabf64-b899-40c0-9f9f-2009dbb0a0b2
'Biomass-growth' (kilogram, None, None) a7d34649-9c10-4423-bac3-ecab9b43b20c
'Biomass-processing' (kilogram, None, None) 403a5c32-c769-46fc-8b9a-74b8eb3c79d1
'Biobased-product-manufacturing' (kilogram, None, None) a37d149a-6508-4563-8af6-e5a39b4176df
'Biobased-product-eol' (kilogram, None, None) c8301e73-d521-4a89-998b-30b7e7751011

# calculation of static LCA score
mymethod = ('IPCC 2013', 'climate change', 'global warming potential (GWP100)')
use = bw.Database('ALIGNED-biob-prod-dummy').get('f9eabf64-b899-40c0-9f9f-2009dbb0a0b2') # Biobased-product-use
functional_unit = {use: 1}
LCA = bw.LCA(functional_unit, mymethod)
LCA.lci()
LCA.lcia()
print(LCA.score)

121.67148851736351

mc = bw.MonteCarloLCA(functional_unit, mymethod)
mc_results = [next(mc) for x in range(100)] # will take a minute, we are randomly sampling ecoinvent 100 times

# plot MC results
plt.hist(mc_results, density=True)
plt.ylabel("Probability")
plt.xlabel(bw.methods[mymethod]["unit"])
pd.DataFrame(mc_results).describe()

if 'ALIGNED-biob-prod-dummy-alt' in bw.databases:
    del bw.databases['ALIGNED-biob-prod-dummy-alt']

# create a copy of the foreground database
db_alt = bw.Database('ALIGNED-biob-prod-dummy').copy('ALIGNED-biob-prod-dummy-alt')

Writing activities to SQLite3 database:
0% [#####] 100% | ETA: 00:00:00
Total time elapsed: 00:00:00

Title: Writing activities to SQLite3 database:
  Started: 03/14/2024 20:36:15
  Finished: 03/14/2024 20:36:15
  Total time elapsed: 00:00:00
  CPU %: 22.50
  Memory %: 2.66

# Now I modified this database to show that more product is used in the use stage.
use_alt = db_alt.get('f9eabf64-b899-40c0-9f9f-2009dbb0a0b2')
exc_alt = list(use_alt.exchanges())[1] # select the second exchange in the activity, i.e. the input from manufacturing
print('original: ', exc_alt) # checking the original

exc_alt['amount'] = 60 # I am increasing the amount of product needed in manufacturing
exc_alt['minimum'] = 60*0.8 # Some uncertainty, lower bound of uniform distribution
exc_alt['maximum'] = 60*1.2 # Some uncertainty, upper bound of uniform distribution
exc_alt.save() # important or the changes won't be maintained
print('alternative: ', exc_alt) # checking the  alternative

original:  Exchange: 50.0 kilogram 'Biobased-product-manufacturing' (kilogram, None, None) to 'Biobased-product-use' (year, None, None)>
alternative:  Exchange: 60 kilogram 'Biobased-product-manufacturing' (kilogram, None, None) to 'Biobased-product-use' (year, None, None)>

# check that both amount and uncertainty values are different
print(list(use.exchanges())[1]._data, "\n---\n", list(use_alt.exchanges())[1]._data)

{'input': ('ALIGNED-biob-prod-dummy', 'a37d149a-6508-4563-8af6-e5a39b4176df'), 'amount': 50.0, 'unit': 'kilogram', 'type': 'technosphere', 'uncertainty type': 4, 'minimum': 37.5, 'maximum': 62.5, 'Notes': 'Input of manufacturing', 'Simapro name': 'Biobased-product-manufacturing', 'Simapro unit': 'kg', 'output': ('ALIGNED-biob-prod-dummy', 'f9eabf64-b899-40c0-9f9f-2009dbb0a0b2')} 
---
 {'input': ('ALIGNED-biob-prod-dummy-alt', 'a37d149a-6508-4563-8af6-e5a39b4176df'), 'amount': 60, 'unit': 'kilogram', 'type': 'technosphere', 'uncertainty type': 4, 'minimum': 48.0, 'maximum': 72.0, 'Notes': 'Input of manufacturing', 'Simapro name': 'Biobased-product-manufacturing', 'Simapro unit': 'kg', 'output': ('ALIGNED-biob-prod-dummy-alt', 'f9eabf64-b899-40c0-9f9f-2009dbb0a0b2')}

# check the static value is different
lca = bw.LCA({use_alt: 1}, mymethod)
lca.lci()
lca.lcia()
lca.score # higher impact as expected

135.798938697616

# Create a database with both activities to have a common technology matrix.

cmc_db = bw.Database('cmc') # cmc = comparative Monte Carlo

# A simplified db, one foreground activity: 'Electricity production,
cmc_db.write({  
    ('cmc', 'cmc-act'): {
        'name': 'comparative monte carlo activity',
        'unit': 'p',
        'exchanges': [{
            'input': ('ALIGNED-biob-prod-dummy', 'f9eabf64-b899-40c0-9f9f-2009dbb0a0b2'), # first alternative
            'amount': 1,
            'type': 'technosphere'},
        {
            'input': ('ALIGNED-biob-prod-dummy-alt', 'f9eabf64-b899-40c0-9f9f-2009dbb0a0b2'), # second alternative
            'amount': 1,
            'type': 'technosphere'},
        {
            'input': ('cmc', 'cmc-act'),
            'amount': 1,
            'type': 'production'}]}})

# select the foreground activity and calculate result
#lca = bw.LCA({cmc_db.get('cmc-act'): 1}, mymethod)
#lca.lci()
#lca.lcia()

Writing activities to SQLite3 database:
0% [#] 100% | ETA: 00:00:00
Total time elapsed: 00:00:00

Title: Writing activities to SQLite3 database:
  Started: 03/14/2024 20:36:41
  Finished: 03/14/2024 20:36:41
  Total time elapsed: 00:00:00
  CPU %: 106.20
  Memory %: 2.94

# Initialize monte carlo simulation
mc = bw.MonteCarloLCA({cmc_db.get('cmc-act'): 1}, mymethod) # We are calculating the impact of transport with the use first
next(mc)

424.94768383053196

# a list ot he alternative to be compared
demands = [{('ALIGNED-biob-prod-dummy', 'f9eabf64-b899-40c0-9f9f-2009dbb0a0b2'):1},
          {('ALIGNED-biob-prod-dummy-alt', 'f9eabf64-b899-40c0-9f9f-2009dbb0a0b2'):1}] # the alternative!

# look at this first
mc.redo_lcia(demands[0]) # use
print(mc.score)
mc.redo_lcia(demands[1]) # alternative use. I am using the same technology matrix "A" as before to calcualte results
print(mc.score)
mc.redo_lcia(demands[0]) # use again. Same result. Note how "redo.lcia" allows doing dependent sampling
print(mc.score)

169.0173497706422
255.9303340598896
169.0173497706422

# Now for several iterations
iterations = 100
simulations = [] # empty list that will contain the restuls of all iterations, for all alternatives

for _ in range(iterations):
    #print(_)
    next(mc)
    mcresults = []    # empty list that will contain results for one iteration, for two alteratives
    for i in demands:
        mc.redo_lcia(i)
        mcresults.append(mc.score)
    simulations.append(mcresults) # appends one list to another

# let's give a look
df = pd.DataFrame(simulations, columns = ['use','use_alt']) # df is for "dataframe"
#df.to_csv('ComparativeMCsimulation.csv') # to save it
df

df.describe()

df.plot(kind = 'box')
#df.T.melt()

<Axes: >

# plot one against the other to see if there is any trend
plt.plot(df['use'], df['use_alt'], 'o')
plt.xlabel('use - kg CO2-eq')
plt.ylabel('use_alt - kg CO2-eq')

Text(0, 0.5, 'use_alt - kg CO2-eq')

# You can see how many times the difference is positive. This is what SimaPro also does
df['diffe'] = df['use'] - df['use_alt']

# Ratio of the difference on each alternative respectively
df['rd_use'] = df['diffe']  / df['use']
df['rd_use_alt'] = df['diffe']  / df['use_alt']

df.describe()

# Let's plot and count the differences.
plt.hist(df.diffe.values)
print(len(df.diffe[df.diffe < 0]))
print(len(df.diffe[df.diffe > 0]))
print(len(df.diffe[df.diffe == 0]))

63
37
0

plt.hist(df['rd_use'].values)
plt.hist(df['rd_use_alt'].values)

(array([ 6., 13., 26., 17., 23., 10.,  1.,  2.,  1.,  1.]),
 array([-0.55247116, -0.41781572, -0.28316029, -0.14850485, -0.01384941,
         0.12080602,  0.25546146,  0.39011689,  0.52477233,  0.65942776,
         0.7940832 ]),
 <BarContainer object of 10 artists>)

# Statistical testing (using the stats package)
# I can use a paired t-test

t_value, p_value = stats.ttest_rel(df['use'],df['use_alt'])
t_value, p_value # very low value

(-4.182920937745814, 6.229363705725887e-05)

# Checking for normality
plt.hist(df['use'].values)
plt.xlabel('use - kg CO2-eq')

SW_value, SW_p_value = stats.shapiro(df['use'])
print(SW_p_value) # Not normally distributed...

plt.hist(df['use_alt'].values)
SW_value, SW_p_value = stats.shapiro(df['use_alt'])
print(SW_p_value) # Not normally distributed...

1.701219841809199e-07
0.00031186424313082826

# Using non-parametric test. Wilcox sign rank test
s_value, p_value = stats.wilcoxon(df['use'], df['use_alt'])
s_value, p_value # Significant difference at p < 0.05

(1314.0, 3.129786912845514e-05)

	0
count	100.000000
mean	154.712661
std	53.100758
min	49.820552
25%	119.672906
50%	145.139356
75%	177.796679
max	371.219754

	use	use_alt
0	141.678520	268.721972
1	97.829031	140.486623
2	78.832960	158.403273
3	100.999256	89.248981
4	90.557471	104.953418
...	...	...
95	109.358383	177.270515
96	150.502508	180.014783
97	328.422314	280.353572
98	167.139210	154.822247
99	160.611146	189.419131

	use	use_alt
count	100.000000	100.000000
mean	151.671161	171.706618
std	61.587156	66.304910
min	66.681924	75.105630
25%	109.333769	119.021908
50%	138.413616	158.396103
75%	172.730066	211.832499
max	367.724436	404.780040

	use	use_alt	diffe	rd_use	rd_use_alt
count	100.000000	100.000000	100.000000	100.000000	100.000000
mean	151.671161	171.706618	-20.035457	-0.168291	-0.088051
std	61.587156	66.304910	47.898244	0.303413	0.233262
min	66.681924	75.105630	-223.629297	-1.234493	-0.552471
25%	109.333769	119.021908	-38.089754	-0.315365	-0.239753
50%	138.413616	158.396103	-18.023101	-0.149410	-0.129988
75%	172.730066	211.832499	5.966469	0.057954	0.061522
max	367.724436	404.780040	141.013365	0.442612	0.794083

ALIGNED project: Comparative Monte Carlo¶

WP1 Shared modelling framework and learnings¶

Task 1.4 Framework for interpreting uncertainty¶

Deliverable 1.2 Description of scientific methods¶

Tutorial for performing a comparative Monte Carlo uncertainty analysis¶

Massimo Pizzol, Aalborg University (AAU), 2024¶

A simple Stochastic simulation¶

A comparative stochastic simulation¶

(Technical note)¶

Now finally comparative stochastic simulation¶

Testing for differences¶