Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implemented service to parse files #42

Merged
merged 2 commits into from
May 18, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 47 additions & 26 deletions service/sybil_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,45 +96,66 @@ def find_common_items(file1, file2):
return f"An error occurred: {str(e)}"


def filter_addresses(db_path, file1, file2, output_file, output_table):
# Connect to the SQLite database
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
def filter_addresses(db_path='data/dune_data.db', file1='data/sybil.txt', file2='data/not_sybil.txt', output_file='data/result.txt',
output_db='data/result.db', output_table='result'):
# Ensure the data directory exists
os.makedirs(os.path.dirname(output_db), exist_ok=True)

# Connect to the source SQLite database
source_conn = sqlite3.connect(db_path)
source_cursor = source_conn.cursor()

# Connect to the target SQLite database
target_conn = sqlite3.connect(output_db)
target_cursor = target_conn.cursor()

try:
# Read the file contents into sets
# Read file contents into sets
with open(file1, 'r', encoding='utf-8') as f1:
items1 = set(f1.read().splitlines())

with open(file2, 'r', encoding='utf-8') as f2:
items2 = set(f2.read().splitlines())

# Intersection of both files to find common addresses
common_addresses = items1.intersection(items2)
# Union of both files to find excluded addresses
excluded_addresses = items1.union(items2)

# Query the database for entries with these addresses
query = "SELECT * FROM dune_items WHERE ua IN ({})".format(
','.join('?' for _ in common_addresses))
cursor.execute(query, list(common_addresses))
results = cursor.fetchall()
# Function to split a list into chunks
def chunks(lst, n):
for i in range(0, len(lst), n):
yield lst[i:i + n]

# Write the addresses to the output file
with open(output_file, 'w', encoding='utf-8') as f:
for result in results:
f.write(result[0] + '\n')
# Prepare the result set
results = []

# Execute queries in chunks
for chunk in chunks(list(excluded_addresses), 999):
query = "SELECT * FROM dune_items WHERE ua NOT IN ({})".format(
','.join('?' for _ in chunk))
source_cursor.execute(query, chunk)
results.extend(source_cursor.fetchall())

# Create the new table with the same structure
cursor.execute(f"CREATE TABLE IF NOT EXISTS {output_table} LIKE dune_items")
conn.commit()
# Create a new table in the target database and insert data
target_cursor.execute(f"CREATE TABLE IF NOT EXISTS {output_table} (ua TEXT, tc INTEGER, amt REAL, amt_avg REAL, cc TEXT, dwm TEXT, lzd INTEGER)")
target_conn.commit()

# Insert filtered data into the new table
insert_query = f"INSERT INTO {output_table} VALUES (?,?,?,?,?,?,?)"
cursor.executemany(insert_query, results)
conn.commit()
target_cursor.executemany(insert_query, results)
target_conn.commit()

return "Process completed successfully."
# Write only the addresses to a new text file
with open(output_file, 'w', encoding='utf-8') as f:
for result in results:
f.write(result[0] + '\n')

return "Data filtered and output file created."
except Exception as e:
return f"An error occurred: {str(e)}"
finally:
# Close the database connection
conn.close()
# Close both database connections
source_conn.close()
target_conn.close()


print(filter_addresses())

# print(find_common_items('data/sybil.txt', 'data/not_sybil.txt'))