Source code for sbutils.LoadData

import os

import pandas as pd


[docs]class LoadData(object): """ This class contain a set of methods to load data from the csv files The main one is create_df_tx which load all the transactions from the csv files """ def __init__(self, path_to_transaction_folder): """ Initialize the class with the path to the folder containing the csv files as exported by Flipside package It should contain a folder for each chain (ethereum, arbitrum, polygon, etc.) and inside are the csv files name as eoa_tx.csv Parameters ---------- path_to_transaction_folder : str The path to the folder containing the csv files """ self.path_to_tx_dir = path_to_transaction_folder
[docs] def create_df_all_transactions(self, files, tx_chain, n_files=-1): """ Create a dataframe with all transactions from a given chain. Parameters ---------- files : list A list of files to load tx_chain : str The chain to load the transactions from. For example "ethereum" n_files : int The number of files to load. If -1, all files are loaded Returns ------- df : pd.DataFrame A dataframe with all transactions from the given chain """ if n_files == -1: df_list = [self.load_df_transactions(files[f_i], tx_chain) for f_i in range(len(files))] else: df_list = [self.load_df_transactions(files[f_i], tx_chain) for f_i in range(n_files)] df = pd.concat(df_list) return df
[docs] @staticmethod def get_files(path, chain): """ Get the list of files in a given path and chain Parameters ---------- path : str The path to the folder containing the network folder chain : str The name of the chain to list the files of Returns ------- files : list A list of files in the given path and chain """ path_to_csv = os.path.join(path, chain) files = os.listdir(path_to_csv) return files
[docs] def get_files_in_address(self, chain, address_list): """ Get the list of files in a given path and chain and that are in the address list Parameters ---------- chain : str The name of the chain to list the files of address_list : list A list of addresses to filter the contributors Returns ------- files : list A list of files in the given path and chain and filtered by the address list """ all_files = self.get_files(self.path_to_tx_dir, chain) files = [file for file in all_files if self.get_address_name(file) in address_list] return files
[docs] def create_df_tx(self, tx_chain, address_list=None, n_files=-1): """ Create a dataframe with all transactions from a given chain. You can provide a list of addresses to filter the contributors. For example if you want to study the sybil of a given grant or project Parameters ---------- tx_chain : str The chain to study. For example "ethereum" address_list : list A list of addresses to filter the contributors n_files : int The number of files to load. If -1, all files are loaded Returns ------- df : pd.DataFrame A dataframe with all transactions from the given chain """ if address_list is None: files = self.get_files(self.path_to_tx_dir, tx_chain) else: files = self.get_files_in_address(tx_chain, address_list) if n_files == -1: df_list = [self.load_df_tx(files[f_i], tx_chain) for f_i in range(len(files))] else: df_list = [self.load_df_tx(files[f_i], tx_chain) for f_i in range(n_files)] df = pd.concat(df_list) return df
[docs] def load_df_tx(self, file_name, tx_chain): """ Load a dataframe with all transactions from a given file name and chain Parameters ---------- file_name : str The name of the file to load tx_chain : str The chain to load the transactions from. For example "ethereum" Returns ------- df : pd.DataFrame A dataframe with all transactions from the given file name and chain Adds a column with the EOA name """ path_dir = os.path.join(self.path_to_tx_dir, tx_chain) full_path = os.path.join(path_dir, file_name) try: df = pd.read_csv(full_path) except Exception as e: print(e) print("Error reading file: {}".format(full_path)) raise if not "EOA" in df.columns.values: df["EOA"] = self.get_address_name(file_name) return df
[docs] @staticmethod def get_address_name(file_name): """return the address name from the file name""" return file_name.split('_')[0]