r'''Methods for generating the Bhattacharyya
coefficient between two decks of cards.
First you read in a couple decks from a decklist like so:
>>> da = "1 Card\n1 Card\n5 Card B\n6 card C"
>>> db = "1 Card\n2 Card\n3 Card B\n6 card D"
Then convert them into deck dicts.
>>> da = read_deck(da)
>>> db = read_deck(db)
Then convert the int values to percents.
We do this because the conversion to percent is likely to lose precision.
>>> da = deck_to_percentages(da)
>>> db = deck_to_percentages(db)
And finally get the Bhattacharyya coefficient
>>> bhat_distance(da, db)
50.620297161120519
Or if we want it from 0 to 1:
>>> bhat_distance(da, db)/100
0.5062029716112052
Which means that, among the cards in our two decks,
each deck is roughly half away from the other.
'''
# pylint: disable-msg=W0312
from math import sqrt
def bhat_distance(lista, listb):
'''
Takes two dicts, the key being the name of the element,
the value being the percentage amount of that element present.
Returns the Bhattacharyya coefficient from 0.0 to 100.0
Sqrt and multiply each shared element, for example:
>>> sqrt(3)*sqrt(5)
3.872983346207417
Then add each of those that are both present.
>>> (sqrt(3)*sqrt(5))+(sqrt(1)*sqrt(3))
5.6050341537762947
>>> lista = {"A":3, "B":5, "C":1, "D":91}
>>> listb = {"A":5, "X":5, "C":3, "E":87}
>>> int(bhat_distance(lista, listb))
5
Half of each deck coincides with the other.
>>> lista = {"A": 50, "B": 30, "C": 20}
>>> listb = {"A": 50, "X": 50}
>>> int(bhat_distance(lista, listb))
50
No overlap, no cards shared, method returns 0.
>>> lista = {"card a":100}
>>> listb = {"card b":100}
>>> int(bhat_distance(lista, listb))
0
If a deck is double-sized, it returns 100%.
>>> lista = {"card a":5, "asdf":95}
>>> listb = {"card a":10, "asdf":90}
>>> bhat_distance(lista, listb)
99.537277856400109
>>> lista = {"Generic Card": 4, "Other Cards":96}
>>> listb = {"Generic Card": 4, "Other Cards":96}
>>> str(bhat_distance(lista, listb))
'100.0'
>>> lista = {"Generic Card": 100}
>>> bhat_distance(lista, lista)
100.0
'''
runningtotal = 0
for elem in lista.keys():
if elem in listb:
runningtotal += (sqrt(lista[elem])*sqrt(listb[elem]))
return runningtotal
def read_deck(decklist):
r'''Reads a given string as if it were a deck in GCCG format.
Lines with # prepending them are ignored.
Each card line starts with a number, followed by a comma, followed by the name of the card.
Returns a dict of all cards, with values equal to the number of each of that card present.
>>> read_deck("1 Card")
{'Card': 1}
Duplicates are merged:
>>> read_deck("1 Card\n2 Card")
{'Card': 3}
And whitespace is nuked as well:
>>> read_deck("1 Card\n2 Card\n5 Card B\n 6 card C")
{'Card B': 5, 'Card': 3, 'card C': 6}
'''
deck = {}
for line in decklist.split("\n"):
#Clean any leading or trailing spaces.
line = line.strip()
if len(line) == 0 or line[0] == "#":
#Skip comments and blanklines
continue
else:
count, cardname = line.split(" ", 1)
count = int(count.strip())
cardname = cardname.strip()
if cardname in deck:
deck[cardname] += count
else:
deck.update({cardname:count})
return deck
def deck_to_percentages(deck):
'''Takes a deck dict from read_deck and converts it into a
dict of percentages per card.
>>> deck = {'Card B': 5, 'Card': 3, 'card C': 6}
>>> deck_to_percentages(deck)
{'Card B': 35.714285714285715, 'Card': 21.428571428571427, 'card C': 42.857142857142854}
'''
newdeck = {}
count = 0.0
for key in deck.keys():
count += deck[key]
for key in deck.keys():
newdeck[key] = (deck[key]/count)*100
return newdeck
def main():
'''Default command-line behavior:
Compare two given decklists in gccg format and print a string
with each filename and the similarity between the two.
'''
import sys
#TODO: Iterate over all given arguments and compare them all
#TODO: If an arg is a directory, recurse into it.
if len(sys.argv) < 2:
print "Usage: python deck_comparator.py decklist1.deck decklist2.deck"
print
else:
deck1 = sys.argv[1]
deck2 = sys.argv[2]
with open(deck1) as fpointer:
decka = deck_to_percentages(read_deck(fpointer.read()))
with open(deck2) as fpointer:
deckb = deck_to_percentages(read_deck(fpointer.read()))
bhat_coeff = bhat_distance(decka, deckb)
print "%s vs %s: %d%% similarity." % (deck1, deck2, bhat_coeff)
if __name__ == "__main__":
import doctest
doctest.testmod()
main()