Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 32 additions & 61 deletions src/votekit/pref_profile/pref_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
_validate_score_csv_format,
)
from votekit.pref_profile.utils import (
_sum_rank_profiles,
_sum_score_profiles,
convert_row_to_rank_ballot,
convert_row_to_score_ballot,
)
Expand Down Expand Up @@ -276,6 +278,9 @@ def __str__(self) -> str:
def group_ballots(self) -> Self:
raise NotImplementedError

def copy(self) -> Self:
raise NotImplementedError

@property
def ballots(self) -> tuple[Ballot, ...]:
raise NotImplementedError
Expand Down Expand Up @@ -672,44 +677,7 @@ def __add__(self, other) -> RankProfile:
"""
Add two PreferenceProfiles by combining their ballot lists.
"""
if not isinstance(other, RankProfile):
raise TypeError("Unsupported operand type. Must be an instance of RankProfile.")

assert self.max_ranking_length is not None and other.max_ranking_length is not None
max_ranking_length = max([self.max_ranking_length, other.max_ranking_length])
candidates = list(set(self.candidates).union(other.candidates))

df_1 = self.df.copy()
df_2 = other.df.copy()

if self.max_ranking_length < max_ranking_length:
for i in range(self.max_ranking_length, max_ranking_length):
df_1.insert(
len(df_1.columns),
f"Ranking_{i + 1}",
pd.Series([frozenset("~")] * len(df_1), dtype=object, index=df_1.index),
)
if other.max_ranking_length < max_ranking_length:
for i in range(other.max_ranking_length, max_ranking_length):
df_2.insert(
len(df_2.columns),
f"Ranking_{i + 1}",
pd.Series([frozenset("~")] * len(df_2), dtype=object, index=df_2.index),
)

new_df = pd.concat([df_1, df_2], ignore_index=True)
new_df.index.name = "Ballot Index"
ranking_cols = [c for c in new_df.columns if "Ranking_" in c]
new_df[ranking_cols] = new_df[ranking_cols].astype("object")
new_df = new_df[
[f"Ranking_{i + 1}" for i in range(max_ranking_length)] + ["Weight", "Voter Set"]
]

return RankProfile(
candidates=candidates,
df=new_df,
max_ranking_length=max_ranking_length,
)
return _sum_rank_profiles([self, other])

def group_ballots(self) -> RankProfile:
"""
Expand Down Expand Up @@ -745,6 +713,19 @@ def group_ballots(self) -> RankProfile:
max_ranking_length=self.max_ranking_length,
)

def copy(self) -> RankProfile:
"""
Returns a copy of a RankProfile

Returns:
RankProfile: New RankProfile object
"""
return RankProfile(
candidates=self.candidates,
df=self.df.copy(),
max_ranking_length=self.max_ranking_length,
)

def __eq__(self, other):
if not isinstance(other, RankProfile):
return False
Expand Down Expand Up @@ -1254,29 +1235,7 @@ def __add__(self, other):
"""
Add two PreferenceProfiles by combining their ballot lists.
"""
if not isinstance(other, ScoreProfile):
raise TypeError("Unsupported operand type. Must be an instance of ScoreProfile.")

df_1 = self.df.copy()
df_2 = other.df.copy()

cand1 = set(self.candidates)
cand2 = set(other.candidates)
for cand in cand2 - cand1:
df_1[cand] = [np.nan] * len(df_1)
for cand in cand1 - cand2:
df_2[cand] = [np.nan] * len(df_2)

new_df = pd.concat([df_1, df_2], ignore_index=True)
new_df.index.name = "Ballot Index"

new_candidates = sorted(set(self.candidates).union(other.candidates))
new_df = new_df[new_candidates + ["Weight", "Voter Set"]]

return ScoreProfile(
candidates=new_candidates,
df=new_df,
)
return _sum_score_profiles([self, other])

def group_ballots(self) -> ScoreProfile:
"""
Expand Down Expand Up @@ -1312,6 +1271,18 @@ def group_ballots(self) -> ScoreProfile:
candidates=self.candidates,
)

def copy(self) -> ScoreProfile:
"""
Returns a copy of a ScoreProfile

Returns:
ScoreProfile: New ScoreProfile object
"""
return ScoreProfile(
df=self.df.copy(),
candidates=self.candidates,
)

def __eq__(self, other):
if not isinstance(other, ScoreProfile):
return False
Expand Down
143 changes: 142 additions & 1 deletion src/votekit/pref_profile/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ def convert_rank_profile_to_score_profile_via_score_vector(
raise ValueError("Ballots must not contain ties.")

cand_to_score_list = {
c: [np.nan for _ in range(len(rank_profile.df))] for c in rank_profile.candidates
cand: [np.nan for _ in range(len(rank_profile.df))] for cand in rank_profile.candidates
}

for df_tuple in rank_profile.df[ranking_cols].itertuples():
Expand All @@ -427,3 +427,144 @@ def convert_rank_profile_to_score_profile_via_score_vector(
df=new_df,
candidates=rank_profile.candidates,
)


def _sum_rank_profiles(rank_profiles: Sequence[PreferenceProfile]) -> RankProfile:
"""
Helper function for sum_profiles that sums RankProfiles.

Args:
rank_profiles (Sequence[PreferenceProfile]): List of profiles to sum.

Raises:
TypeError: Each profile must be of RankProfile type
"""

from votekit.pref_profile.pref_profile import RankProfile

if len(rank_profiles) == 1 and isinstance(rank_profiles[0], RankProfile):
return rank_profiles[0].copy()

if not (all(isinstance(p, RankProfile) for p in rank_profiles)):
invalid_profiles = [
(i, type(p).__name__)
for i, p in enumerate(rank_profiles)
if not isinstance(p, RankProfile)
]
invalid_profiles_str = ", ".join(f"index {i} ({t})" for i, t in invalid_profiles)
raise TypeError(
"All profiles must be of the same type, RankProfile. "
f"non-RankProfiles found at: {invalid_profiles_str}"
)

candidates = list(set().union(*[set(profile.candidates) for profile in rank_profiles]))
max_ranking_length = max([profile.max_ranking_length for profile in rank_profiles])

total_dfs = []
for profile in rank_profiles:
assert profile.max_ranking_length is not None
curr_df = (
profile.df.copy() if profile.max_ranking_length < max_ranking_length else profile.df
)
for i in range(profile.max_ranking_length, max_ranking_length):
curr_df.insert(
len(curr_df.columns),
f"Ranking_{i + 1}",
pd.Series([frozenset("~")] * len(curr_df), dtype=object, index=curr_df.index),
)
total_dfs.append(curr_df)

new_df = pd.concat(total_dfs, ignore_index=True)
new_df.index.name = "Ballot Index"
ranking_cols = [col for col in new_df.columns if "Ranking_" in col]
new_df[ranking_cols] = new_df[ranking_cols].astype("object")
new_df = new_df[
[f"Ranking_{i + 1}" for i in range(max_ranking_length)] + ["Weight", "Voter Set"]
]

return RankProfile(
candidates=candidates,
df=new_df,
max_ranking_length=max_ranking_length,
)


def _sum_score_profiles(score_profiles: Sequence[PreferenceProfile]) -> ScoreProfile:
"""
Helper function for sum_profiles that sums ScoreProfiles.

Args:
score_profiles (Sequence[PreferenceProfile]): The profiles to sum.

Raises:
TypeError: Each profile must be of ScoreProfile type
"""

from votekit.pref_profile.pref_profile import ScoreProfile

if len(score_profiles) == 1 and isinstance(score_profiles[0], ScoreProfile):
return score_profiles[0].copy()

if not (all(isinstance(p, ScoreProfile) for p in score_profiles)):
invalid_profiles = [
(i, type(p).__name__)
for i, p in enumerate(score_profiles)
if not isinstance(p, ScoreProfile)
]
invalid_profiles_str = ", ".join(f"index {i} ({t})" for i, t in invalid_profiles)
raise TypeError(
"All profiles must be of the same type, ScoreProfile. "
f"non-ScoreProfiles found at: {invalid_profiles_str}"
)

total_cand = set().union(*[set(profile.candidates) for profile in score_profiles])
total_dfs = []
for profile in score_profiles:
curr_cand = set(profile.candidates)
curr_df = profile.df.copy() if curr_cand < total_cand else profile.df
for cand in total_cand - curr_cand:
curr_df[cand] = [np.nan] * len(curr_df)
total_dfs.append(curr_df)
Comment on lines +520 to +527

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could speed this up a little bit by not copying the dataframes that already contain a score for every candidate in total_cand. Avoids a secondary df copy.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did not copy the data frame if they already contain a score for each candidate or the same ranking length for summing ScoreProfiles and RankProfiles respectively.


new_df = pd.concat(total_dfs, ignore_index=True)
new_df.index.name = "Ballot Index"
new_candidates = sorted(total_cand)
new_df = new_df[new_candidates + ["Weight", "Voter Set"]]

return ScoreProfile(
candidates=new_candidates,
df=new_df,
)


def sum_profiles(profiles: Sequence[PreferenceProfile]) -> PreferenceProfile:
"""
Combines multiple PreferenceProfiles by combining their ball lists.

Args:
profiles (Sequence[PreferenceProfile]): The profiles to sum.

Returns:
PreferenceProfile: A new PreferenceProfile object containing the combined profiles.

Raises:
ValueError: Cannot sum an empty list of profiles.
TypeError: Can only sum profiles of type RankProfile or ScoreProfile.
"""

from votekit.pref_profile.pref_profile import RankProfile, ScoreProfile

if len(profiles) == 0:
raise ValueError("Cannot sum an empty list of profiles.")

if isinstance(profiles[0], RankProfile):
return _sum_rank_profiles(profiles)

elif isinstance(profiles[0], ScoreProfile):
return _sum_score_profiles(profiles)

else:
raise TypeError(
f"Cannot sum profiles of type {type(profiles[0]).__name__}. "
"List can only contain RankProfiles or ScoreProfiles."
)
Loading
Loading