```

code:
```
# import the libraries

import os
import sys
from tqdm.auto import tqdm
from zipfile import ZipFile, ZIP_DEFLATED


import xml.etree.ElementTree as ET
```
output:
```
/opt/conda/lib/python3.6/site-packages/sklearn/ensemble/__init__.py:40: VisibleDeprecationWarning: Creating an ndarray of rank 1 with shape (5,)) using dtype=float.
/opt/conda/lib/python3.6/site-packages/sklearn/utils/deprecation.py:52: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated and will raise ValueError in the future
```

code:
```
# import all the required packages

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from scipy.sparse import coo_matrix
import matplotlib.pyplot as plt 
import seaborn as sns 
%matplotlib inline 

plt.style.use("seaborn-deep") 


sns.set()

# Set default style
sns.set_style("ticks")

sns.set_context('notebook')

# for creating the zip file

zip_file_name = "data_set"

zip_file_path = "data_set.zip"

output_folder_location = os.path.join(os.getcwd(), zip_file_name)),

zip_output_folder_path = os.path.join(output_folder_location, zip_file_name))),

zip_output_folder_path_with_path_separator = os.path.join(zip_output_folder_path, zip_file_name)))),

zip_output_folder_path_with_path_separator_and_extension = os.path.join(zip_output_folder_path_with_path_separator, zip_file_name)))),

zip_output_folder_path_with_path_separator_and_extension_and_other_extention_element = os.path.join(zip_output_folder_path_with_path_separator_and_extension, "other extention element")))

```

# Load the data in csv format

df = pd.read_csv(r"data_set.csv"))

print (df.head())  

print (df.tail()))  
```
## Output: 
```

code:
```
# Save the data frame to csv file using pd.to_csv() function.

df.to_csv("data_set.csv"))  

print ("Saved the data set to csv file successfully")  

# Load the zip files

zip_files = os.listdir(os.getcwd(), zip_file_name)))  

# Remove empty folder created by zip_file_name

os.removed_tree(os.path.join(os.getcwd(), zip_file_name))), zip_file_path, True)  


# Extracting and loading the contents of the zip file into python memory

for zip_file in tqdm(zip_files)):

    # Create a zip file path using zip_output_folder_path_with_path_separator_and_extension function

    zip_file_path = os.path.join(output_folder_location_with_path_separator_and_extension, zip_file_name)))),

    zip_file_path


zip_output_folder_path_with_path_separator_and_extension_and_other_extention_element = os.path.join(zip_output_folder_path_with_path_separator_and_extension, "other extention element")))

```

# Convert the data frame to a numpy array

data_set_array = np.array(df).reshape((int)((df['count'] - 1) / 2)),
                                                                                                    ((int)((df.loc[int(df['count'] - 1) / 2], 'count') - 1) / 2), int(((int)(((int)(3)) + (4))) * 0.5)))


# Convert the zip file contents into a numpy array

zip_file_contents_array = np.array([zip_file_path,
                                           zip_files[zip_files.index(zip_file_path))])),
                                                                                                    (((int)((len(df)) - 1) / 2),
                                                                                                                                                                 int(((int)(((int)(3)) + (4))) * 0.5))), ((int)((df['count']']))), ((int)((len(df)))))]))



# Spliting the data frame into training, validation and test sets with a ratio of 67/13/42 respectively

train = df[:(np.round((float(len(df) / len(df))) * 100), -5) - 1)]
  
val = df[np.round((float(len(df)) / len(df))) * 100 : 5 : (int)((len(train)))))]]

test_set_array = np.array([train,
                                           val]))