\documentclass[]{standalone}
\input{config-gfx}

\begin{document}
\def\axisdefaultwidth{10cm}
\def\axisdefaultheight{7cm}
%% read data
\pgfplotstableread[col sep=comma, header=true, read completely=true]{outputs_runtime_parallel_strong.csv}\outputsTable
\begin{tikzpicture}
  \begin{axis}[
      grid=both,
      table/x=num-threads,
      ymode=log,
      xmode=linear,
      xlabel=Number of Cores/Number of Threads (\num{32768} Inputs Total),
      ylabel=Run Time per Inference Step,
      y unit=\si{\second},
      ymin=0.00008,
      ymax=0.06,
      xmin=1,
      xmax=48,
      mark=none,
      legend pos=south west,
      legend cell align=left,
      legend style={
        font=\tiny,
        xshift=-0.5ex,
        yshift=-0.3ex,
      },
    ]
    % \foreach \c [count=\x from 0] in { subgridles-gcc-open-cpp,subgridles-gcc-open-fortran,subgridles-gcc-open-libtorch,subgridles-gcc-open-pytorch,subgridles-gcc-mkl-cpp,subgridles-gcc-mkl-fortran}
    %     {\addplot+ table[y expr=\thisrow{\c}] {\outputsTable};\addlegendentry{\c{}};}
    \addplot[pytorch] table[y expr=\thisrow{subgridles-intel-mkl-pytorch}] {\outputsTable};\label{plot:pytorch}
    % \addlegendentry{\torchpython};
    \addplot[libtorch] table[y expr=\thisrow{subgridles-intel-open-libtorch}] {\outputsTable};\label{plot:libtorch}
    % \addlegendentry{\torchcpp};
    \addplot[cpp-open] table[y expr=\thisrow{subgridles-intel-open-cpp}] {\outputsTable};\label{plot:opencpp}
    % \addlegendentry{\opencpp};
    \addplot[fortran-open] table[y expr=\thisrow{subgridles-intel-open-fortran}] {\outputsTable};\label{plot:openfortran}
    % \addlegendentry{\openfortran};
    \addplot[cpp-mkl] table[y expr=\thisrow{subgridles-intel-mkl-cpp}] {\outputsTable};\label{plot:mklcpp}
    % \addlegendentry{\mklcpp};
    \addplot[fortran-mkl] table[y expr=\thisrow{subgridles-intel-mkl-fortran}] {\outputsTable};\label{plot:mklfortran}
    % \addlegendentry{\mklfortran};
    % have a function plot for ideal slope
    \addplot+ [mark=none,solid,opacity=0.7,magenta,domain=1:48,samples=95] {0.007/x};
    % \addlegendentry{Ideal Scaling};
  \coordinate[xshift=10ex](smalllegendpos) at (22, 0.012);
  \coordinate[xshift=10ex](biglegendpos) at (38, 0.000085);
\end{axis}
  \matrix[
    matrix of nodes,
    % anchor=west,
    draw,
    inner sep=0.2em,
    nodes={font=\footnotesize},
    fill=white,
  ]at(smalllegendpos)
  {
    \ref{plot:pytorch} \ref{plot:libtorch} & without \dalotia{}         & [5pt]\\
    \ref{plot:opencpp} \ref{plot:openfortran} \ref{plot:mklcpp} \ref{plot:mklfortran} & with \dalotia{}           & [5pt]\\
    };
  \matrix[
    matrix of nodes,
    anchor=south east,
    draw,
    inner sep=0.1em,
    nodes={font=\scriptsize},
    fill=white,
  ]at(biglegendpos)
  {    \ref{plot:pytorch}                & \torchpython{}     &  \ref{plot:opencpp}                & \opencpp{}         &  \ref{plot:mklcpp}                 & \mklcpp{}          \\
       \ref{plot:libtorch}               & \torchcpp{}        &  \ref{plot:openfortran}            & \openfortran{}     &  \ref{plot:mklfortran}             & \mklfortran{}      \\
    };
\end{tikzpicture}
\end{document}
