Skip to content

Commit

Permalink
add third-party data
Browse files Browse the repository at this point in the history
  • Loading branch information
Xiaoyu-SZ committed May 28, 2024
1 parent c1dac89 commit 6f2b8d3
Show file tree
Hide file tree
Showing 3 changed files with 2,566 additions and 40 deletions.
9 changes: 5 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ For example:
```python
MODEL_NAME = 'gpt-3.5-turbo'
CONTAIN_USER_PROFILE = False
CONTAIN_SHOT = 'None' # All or Type or None
CONTAIN_SHOT = 'None' # Type or None
PERSONALIZED = '_personalized' if CONTAIN_USER_PROFILE else ''
TEMPEARTURE = 0
```
Expand All @@ -55,7 +55,7 @@ Then run `annot_vllm.py` or `annot_vllm_single.py`

### Calculate the metrics

Run `corr.py`, it will calculate the correlations between the annotations and the ground truth for all files in `./data/`
Run `corr.py`, it will calculate the correlations between the annotations and the ground truth for files in `./output/`

The output contains Pearson correlation, Spearman correlation and Kendall correlation; all of them are in Dataset-Level, User-Level and Item-Level.

Expand All @@ -67,10 +67,11 @@ The output is in DataFrame format. The columns are:
The `llm_value` is the value predicted by LLM, and the others are from the data.

## Dataset Information
The data of real user labels and self-explanations is from the paper "User Perception of Recommendation Explanation: Are Your Explanations What Users Need?", see `./output/df_explanation.pkl`.

The dataset is from the paper "User Perception of Recommendation Explanation: Are Your Explanations What Users Need?".
We additionally collect third-part annotations for the explanatory texts, see `./output/third_party_annotation.csv`.

If you use this dataset, please cite the paper:
If you use the data in `./output/df_explanation.pkl`, please cite the paper:

```bibtex
@article{UserPerceptionTois2023,
Expand Down
60 changes: 24 additions & 36 deletions corr.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,56 +24,44 @@ def calculate_correlation(data_df, metric, METRIC):
print(FILE_NAME)
df = pd.read_csv(FILE_NAME, sep='\t')
df['user_value'].fillna(3, inplace=True)
df['llm_value'].fillna(0, inplace=True)
results = {}
for metric in ['persuasiveness', 'transparency', 'accuracy', 'satisfactory']:
data_df = df[df['metric'] == metric]
user_values = data_df['user_value']
llm_values = data_df['llm_value']
index = len(user_values)
print(index)
pearsonr_correlation = np.corrcoef(
user_values[:index], llm_values[:index])[0, 1]
pearson_correlation = pearsonr(
user_values[:index], llm_values[:index])[0]
spearmanr_correlation = spearmanr(
user_values[:index], llm_values[:index])[0]
kendalltau_correlation = kendalltau(
user_values[:index], llm_values[:index])[0]
pearson_correlation = pearsonr(user_values[:index], llm_values[:index])[0]
spearmanr_correlation = spearmanr(user_values[:index], llm_values[:index])[0]
kendalltau_correlation = kendalltau(user_values[:index], llm_values[:index])[0]
# mae = np.mean(np.abs(user_values -all_three_array))
# rmse = np.sqrt(np.mean((user_values - all_three_array)**2))

print(f"Metrics for {metric}, Dataset-Level:")
print(f" Scipy Pearson correlation coefficient: {
pearson_correlation}")
# print(f" Pearson correlation coefficient: {correlation_coefficient}")
print(f" Scipy Pearson correlation coefficient: {pearson_correlation}")
print(f" Spearman correlation coefficient: {spearmanr_correlation}")
print(f" Kendall correlation coefficient: {kendalltau_correlation}")

user_correlation = data_df.groupby(['user']).apply(
calculate_correlation, 'user_value', 'llm_value').reset_index()


user_correlation = data_df.groupby(['user']).apply(calculate_correlation,'user_value','llm_value').reset_index()

print(f"Metrics for {metric}, User-Level:")
print(f" META Pearson correlation coefficient: {
user_correlation['pearsonr_correlation'].mean()}")
print(f" META Spearman correlation coefficient: {
user_correlation['spearmanr_correlation'].mean()}")
print(f" META Kendall correlation coefficient: {
user_correlation['kendalltau_correlation'].mean()}")

pair_correlation = data_df.groupby(['user', 'movie_id']).apply(
calculate_correlation, 'user_value', 'llm_value').reset_index()
print(f" META Pearson correlation coefficient: {user_correlation['pearsonr_correlation'].mean()}")
print(f" META Spearman correlation coefficient: {user_correlation['spearmanr_correlation'].mean()}")
print(f" META Kendall correlation coefficient: {user_correlation['kendalltau_correlation'].mean()}")

pair_correlation = data_df.groupby(['user','movie_id']).apply(calculate_correlation,'user_value','llm_value').reset_index()

print(f"Metrics for {metric}, Sample-Level:")
print(f" META Pearson correlation coefficient: {
pair_correlation['pearsonr_correlation'].mean()}")
print(f" META Spearman correlation coefficient: {
pair_correlation['spearmanr_correlation'].mean()}")
print(f" META Kendall correlation coefficient: {
pair_correlation['kendalltau_correlation'].mean()}")

results[metric] = [f"& {100*pearson_correlation:.2f} & {100*spearmanr_correlation:.2f} & {100*kendalltau_correlation:.2f}",
f"& {100*user_correlation['pearsonr_correlation'].mean():.2f} & {100*user_correlation['spearmanr_correlation'].mean():.2f} & {
100*user_correlation['kendalltau_correlation'].mean():.2f}",
f"& {100*pair_correlation['pearsonr_correlation'].mean():.2f} & {100*pair_correlation['spearmanr_correlation'].mean():.2f} & {
100*pair_correlation['kendalltau_correlation'].mean():.2f}"
]
print(f" META Pearson correlation coefficient: {pair_correlation['pearsonr_correlation'].mean()}")
print(f" META Spearman correlation coefficient: {pair_correlation['spearmanr_correlation'].mean()}")
print(f" META Kendall correlation coefficient: {pair_correlation['kendalltau_correlation'].mean()}")

results[metric] = {'dataset':[100*pearson_correlation,100*spearmanr_correlation,100*kendalltau_correlation],
'user':[100*user_correlation['pearsonr_correlation'].mean(),100*user_correlation['spearmanr_correlation'].mean(),100*user_correlation['kendalltau_correlation'].mean()],
'pair':[100*pair_correlation['pearsonr_correlation'].mean(),100*pair_correlation['spearmanr_correlation'].mean(),100*pair_correlation['kendalltau_correlation'].mean()]
}

for i in range(3):
output = ''
Expand Down
Loading

0 comments on commit 6f2b8d3

Please sign in to comment.