Skip to content

Commit

Permalink
Comparisons & Assessments done.
Browse files Browse the repository at this point in the history
Next is UserModels
  • Loading branch information
lenhoanglnh committed Feb 9, 2025
1 parent 4de62d4 commit 403250e
Show file tree
Hide file tree
Showing 20 changed files with 1,938 additions and 1,893 deletions.
7 changes: 3 additions & 4 deletions solidago/src/solidago/modules/aggregation/average.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,16 @@ def __call__(self,
""" Returns weighted average of user's scores """
global_model = DirectScoring()
multiscores = user_models(entities).reorder_keys(["entity_name", "criterion", "username"])
voting_rights1 = voting_rights.groupby(["entity_name", "criterion"])
voting_rights = voting_rights.groupby(["entity_name", "criterion"])

for entity_name in multiscores.get_set("entity_name"):
for criterion in multiscores[entity_name].get_set("criterion"):

voting_rights2 = voting_rights1[entity_name, criterion].groupby(["username"])
weighted_sum = sum([
score * voting_rights2[username]
score * voting_rights[entity_name, criterion].get(username)
for username, score in multiscores[entity_name, criterion]
], Score(0, 0, 0))
sum_of_weights = voting_rights1[entity_name, criterion]["voting_right"].sum()
sum_of_weights = voting_rights[entity_name, criterion]["voting_right"].sum()
global_model[entity_name, criterion] = weighted_sum / sum_of_weights

return global_model
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,12 +114,14 @@ def user_learn(self,
init_model: BaseModel,
) -> DirectScoring:
""" Learns only based on comparisons """
if self.last_comparison_only:
comparisons = comparisons.last_only()
model = DirectScoring()
compared_entity_names = set(comparisons["left_name"]) | set(comparisons["right_name"])
entities = entities.get(compared_entity_names) # Restrict to compared entities
init = init_model(entities).reorder_keys(["criterion", "entity_name"])
criteria = set(comparisons["criterion"]) | init.get_set("criterion")
for criterion, cmps in comparisons.groupby(["criterion"]).items():
for criterion, cmps in comparisons.groupby(["criterion"]):
criterion_entity_names = set(cmps["left_name"]) | set(cmps["right_name"])
if len(criterion_entity_names) <= 1:
continue
Expand Down Expand Up @@ -177,13 +179,13 @@ def compute_uncertainties(self,
rights: npt.NDArray
rights[i] is the right uncertainty on scores[i]
"""
indices = comparisons.compared_entity_indices(entity_name2index, self.last_comparison_only)
indices = comparisons.compared_entity_indices(entity_name2index)
if not indices["left"]:
inf_array = np.array([ float("inf") for _ in entity_name2index ])
return inf_array, inf_array
indices = { loc: np.array(indices[loc]) for loc in ("left", "right") }
score_diffs = scores[indices["left"]] - scores[indices["right"]]
normalized_comparisons = comparisons.normalized_comparisons(self.last_comparison_only)
normalized_comparisons = comparisons.normalized_comparisons()
score_negative_log_likelihood = self.negative_log_likelihood(score_diffs, normalized_comparisons)

kwargs = dict(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,9 @@ def negative_log_posterior(self,
comparisons: Comparisons,
) -> torch.Tensor:
""" Negative log posterior """
indices = comparisons.compared_entity_indices(entity_name2index, self.last_comparison_only)
indices = comparisons.compared_entity_indices(entity_name2index)
score_diffs = scores[indices["left"]] - scores[indices["right"]]
normalized_comparisons = comparisons.normalized_comparisons(self.last_comparison_only)
normalized_comparisons = comparisons.normalized_comparisons()
loss = self.torch_cumulant_generating_function(score_diffs).sum()
loss += (score_diffs * torch.tensor(normalized_comparisons)).sum()
return loss + (scores**2).sum() / (2 * self.prior_std_dev**2)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,7 @@ def __call__(self,
comparisons = comparisons.groupby(["criterion"])
stat_names = ("cumulative_trust", "min_voting_right", "overtrust")
entity_names = {
c: set(d[c]["entity_name"]) | set(d[c]["entity_name"])
for d in (assessments, comparisons)
c: set(assessments[c]["entity_name"]) | set(comparisons[c]["entity_name"])
for c in criteria
}

Expand Down
34 changes: 15 additions & 19 deletions solidago/src/solidago/primitives/datastructure/unnamed_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ class UnnamedDataFrame(DataFrame):
row_cls: Optional[type]=None

def __init__(self,
data: Optional[Any]=None,
key_names: Optional[Union[str, list[str]]]=None,
value_names: Optional[Union[str, list[str]]]=None,
name: Optional[str]=None,
Expand All @@ -30,9 +31,9 @@ def to_list(l):
return l
key_names, value_names = to_list(key_names), to_list(value_names)
key_value_columns = key_names + value_names
if isinstance(args[0], list) or ("data" in kwargs and isinstance(kwargs["data"], list)):
if isinstance(data, list):
kwargs["columns"] = kwargs["columns"] if "columns" in kwargs else key_value_columns
super().__init__(*args, **kwargs)
super().__init__(data=data, *args, **kwargs)
self.meta = SimpleNamespace()
self.meta.name = name
self.meta.key_names, self.meta.value_names = key_names, value_names
Expand Down Expand Up @@ -136,33 +137,25 @@ def load(cls, filename: str) -> "UnnamedDataFrame":

def last_only(self) -> "UnnamedDataFrame":
return type(self)(
data=[ row for _, row in self.iter(process=False, last_only=True) ],
data=DataFrame([ df.iloc[-1] for _, df in self.iter(process=False, last_only=True) ]),
key_names=self.key_names,
value_names=self.value_names,
name=self.meta.name,
default_value=self.meta._default_value,
last_only=self.meta._last_only,
last_only=True
)

def groupby(self, columns: Optional[list[str]]=None, process: bool=True) -> "UnnamedDataFrameDict":
from solidago.primitives.datastructure import UnnamedDataFrameDict
columns = columns if columns else self.key_names
data = { key: value for key, value in self.iter(columns, process) }
return UnnamedDataFrameDict(data, df_cls=type(self))
sub_key_names = [ key for key in self.key_names if key not in columns ]
return UnnamedDataFrameDict(data, df_cls=type(self), main_key_names=columns, sub_key_names=sub_key_names)

def iter(self,
columns: Optional[list[str]]=None,
process: bool=True,
last_only: Optional[bool]=None
) -> Iterable:
columns = columns if columns else self.key_names
last_only = self.meta._last_only if last_only is None else last_only
if columns is None:
for _, row in self.iterrows():
if process:
yield self.row2key(row), self.row2value(row)
else:
yield row
return None
columns = self.key_names if columns is None else columns
if not columns:
yield list(), self.df2value(self, last_only) if process else self
return None
Expand All @@ -171,11 +164,14 @@ def iter(self,
for key in list(groups.groups.keys()):
key_tuple = key if isinstance(key, tuple) else (key,)
df = groups.get_group(key_tuple)
v = type(self)(df, key_names=kn) if kn or not process else self.df2value(df, last_only)
yield key, v
if len(kn) > 0 or not process:
df = DataFrame([df.iloc[-1]]) if last_only and len(kn) == 0 else df
yield key, type(self)(df, key_names=kn)
else:
yield key, self.df2value(df, last_only)

def __iter__(self, process: bool=True) -> Iterable:
return self.iter(process=process)
return self.iter(self.key_names, process=process)

def keys(self, columns: Optional[list[str]]=None) -> list:
return [ keys for keys, _ in self.iter(columns=columns, process=True) ]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from abc import ABC, abstractmethod
from typing import Union, Optional, Callable, Any, Iterable
from types import BuiltinFunctionType, SimpleNamespace
from pathlib import Path
from pandas import DataFrame, Series
from functools import reduce

import pandas as pd


class UnnamedDataFrameDict:

def __init__(self,
*args,
df_cls: type=DataFrame,
main_key_names=list[str],
sub_key_names=list[str],
**kwargs
):
self.dict = dict(*args, **kwargs)
self.df_cls = df_cls
self.main_key_names = main_key_names
self.sub_key_names = sub_key_names

def __getitem__(self, key: Union[Any, tuple[str]]) -> DataFrame:
keys = tuple(str(k) for k in key) if isinstance(key, tuple) else str(key)
return self.dict[keys] if keys in self.dict else self.df_cls()

def __repr__(self) -> str:
return "\n\n".join([ f"{key}:\n{value}" for key, value in self.dict.items() ])

def __iter__(self) -> Iterable:
for keys, value in self.dict.items():
yield keys, value
2 changes: 1 addition & 1 deletion solidago/src/solidago/state/assessments/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def __init__(self,
last_only=True,
**kwargs
):
super().__init__(key_names, None, name, None, last_only, data, **kwargs)
super().__init__(data, key_names, None, name, None, last_only, **kwargs)

def get_evaluators(self, entity: Union[str, "Entity"]) -> set[str]:
return set(self.get(entity_name=entity)["username"])
Expand Down
13 changes: 5 additions & 8 deletions solidago/src/solidago/state/comparisons/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def __init__(self,
last_only=True,
**kwargs
):
super().__init__(key_names, None, name, None, last_only, data, **kwargs)
super().__init__(data, key_names, None, name, None, last_only, **kwargs)

def get_evaluators(self, entity: Union[str, "Entity"]) -> set[str]:
evaluators = set(self.get(left_name=entity)["username"])
Expand Down Expand Up @@ -60,20 +60,17 @@ def order_by_entities(self, other_keys_first: bool=True) -> "Comparisons":

return type(self)(pd.concat([left, right]), key_names=key_names)

def compared_entity_indices(self,
entity_name2index: dict[str, int],
last_only: bool=True,
) -> dict[str, list[int]]:
def compared_entity_indices(self, entity_name2index: dict[str, int]) -> dict[str, list[int]]:
key_indices = { loc: self.key_names.index(f"{loc}_name") for loc in ("left", "right") }
return {
location: [
entity_name2index[keys[key_indices[location]]]
for keys, _ in self.iter(last_only=last_only)
for keys, _ in self.iter(last_only=self.meta._last_only)
] for location in ("left", "right")
}

def normalized_comparisons(self) -> Series:
return Series() if self.empty else self["value"] / self["max"]
def normalized_comparisons(self) -> np.ndarray:
return np.array() if self.empty else np.array(self["value"] / self["max"])

def to_comparison_dict(self,
entities: "Entities",
Expand Down
2 changes: 1 addition & 1 deletion solidago/src/solidago/state/made_public/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def __init__(self,
last_only=True,
**kwargs
):
super().__init__(key_names, value_name, name, default_value, last_only, data, **kwargs)
super().__init__(data, key_names, value_name, name, default_value, last_only, **kwargs)

def penalty(self, privacy_penalty: float, *args, **kwargs) -> float:
return 1 if self.get(*args, **kwargs) else privacy_penalty
2 changes: 1 addition & 1 deletion solidago/src/solidago/state/voting_rights/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@ def __init__(self,
last_only=True,
**kwargs
):
super().__init__(key_names, value_name, name, default_value, last_only, data, **kwargs)
super().__init__(data, key_names, value_name, name, default_value, last_only, **kwargs)
2 changes: 1 addition & 1 deletion solidago/src/solidago/state/vouches/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@ def __init__(self,
last_only=True,
**kwargs
):
super().__init__(key_names, value_names, name, default_value, last_only, data, **kwargs)
super().__init__(data, key_names, value_names, name, default_value, last_only, **kwargs)

Loading

0 comments on commit 403250e

Please sign in to comment.