-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbenchmark_pipeline.py
66 lines (50 loc) · 1.7 KB
/
benchmark_pipeline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from backend import get_data_for_domain, gpt_competitor_domains
from pdf_report.generate_benchmark_pdf import generate_benchmark_report
from loguru import logger
import pandas as pd
def combined_benchmarking():
domain = 'revolut.com'
company_df = get_data_for_domain(domain)
df_list = [company_df]
benchmark_domains = gpt_competitor_domains(domain)
for comp_domain in benchmark_domains:
logger.info(comp_domain)
benchmark_df = get_data_for_domain(comp_domain)
df_list.append(benchmark_df)
results_df = pd.concat(df_list, ignore_index=True)
fields_sorted = [
# Company Information
"display_name",
"alternative_names",
"summary",
"headline",
"tags",
"industry",
# Company Size & Employees
"size",
"employee_count",
"employee_count_by_country",
# Company Location & Language
"company_website",
"country",
"locality",
"language",
# Traffic & Engagement
"quarter",
"number_of_visits",
# Funding & Investment
"financing_list",
"total_funding_raised",
"latest_funding_stage",
"last_funding_date",
"number_funding_rounds",
"funding_stages",
"invester_list"
]
filtered_df = results_df[fields_sorted]
logger.info(filtered_df.shape)
logger.info("Successfully created csv")
filtered_df.to_csv('dataset/benchmark_db.csv', index=False) #TODO: Do we want to actually always update this db? Or overwrite
pdf_path = generate_benchmark_report("dataset/benchmark_db.csv", "Revolut")
logger.info("Generated Report")
return pdf_path