import numpy as np

#Names of random variables in our model
var_names=['State', 'Age', 'COVID', 'Party', 'Favorite', 'Voting']
           
#Names of possible values (categories) for each random variables.
val_names=[['Texas', 'California', 'NewYork'], #State
           ['Young', 'Old'],                   #Age
           ['Negative', 'Positive'],           #COVID
           ['Democrats', 'Republicans'],       #Party
           ['Biden', 'Trump'],                 #Favorite
           ['InPerson', 'Postal', 'NotVoting']]#Voting

#The above variables with names are introduced just to make up a story around our model. Use these names
# to interpret your results, but, in your 'generic' inference algirithms, avoid the use of these variables
# and use only the definitions below!

#Number of categories for each random variables.
ncategories = np.array([len(v) for v in val_names])

class Factor:
    """
    Instances of this class represent individual factors in a factor graph.
    It is merely a structure of two member variables:
    'vars': list of N integers, which are IDs of N random variables that this
        factor depends on. These integers can be seen as indices into 'var_names'
    'table': potential function of the factor. Since we deal only with categorical
        variables, the potential function has form of N-dimensional array.
        The first dimension corresponds to the first variable in 'vars', the second
        dimension to the second variable, etc. The size of each dimension is given
        by the number of possible values of the corresponding variable
    """
    def __init__(self, list_of_variable, potential_function_table):
        self.vars = list(list_of_variable)
        self.table = np.array(potential_function_table)
        
        # the number of table dimensions and the number of variables must match
        assert(self.table.ndim == len(self.vars))
        # the individual dimensions must match with the number of cathegories of the corresponding variable
        assert(np.all(ncategories[self.vars]==self.table.shape))

"List of factors defining our complete probabilistic model"
factors = [
#         P(State)
    Factor([0], [0.3,   # Texas
                 0.5,   # California
                 0.2]), # NewYork

#         P(Age)
    Factor([1], [0.6,   # Young
                 0.4]), # Old

#         P(COVID)
    Factor([2], [0.7,   # Negative 
                 0.3]), # Positive

#                               Texas      California  NewYork
#         P(Party|State,Age)   Young,Old   Young,Old   Young.Old
    Factor([3,    0,    1],  [[[0.4, 0.2], [0.9, 0.8], [0.8, 0.6]],  # Democrats 
                              [[0.6, 0.8], [0.1, 0.2], [0.2, 0.4]]]),# Republican

#         P(Favorite|Party)    Dem.  Rep.
    Factor([4,       3],     [[0.95, 0.2],  # Biden
                              [0.05, 0.8]]),# Trump
    
#                                Democrats   Republicans
#         P(Voting|Party,COVID) Neg. Pos.  Neg. Pos. 
    Factor([5,     3,    2], [[[0.5, 0.0], [0.7, 0.1]], # InPerson 
                              [[0.4, 0.9], [0.1, 0.4]], # Postal
                              [[0.1, 0.1], [0.2, 0.5]]])# None
]


def evaluate_factor(factor, *values):
    var_vals = ['='.join((var_names[var], val_names[var][val])) for var,val in zip(factor.vars, values)]
    print("P("+var_vals[0]+ ('' if len(var_vals)<2 else '|')+(','.join(var_vals[1:]))+") =", factor.table[values])

evaluate_factor(factors[5], 1, 0, 1)
evaluate_factor(factors[5], 2, 1, 1)
evaluate_factor(factors[4], 0, 1)
evaluate_factor(factors[0], 1)

P(Voting=Postal|Party=Democrats,COVID=Positive) = 0.9
P(Voting=NotVoting|Party=Republicans,COVID=Positive) = 0.5
P(Favorite=Biden|Party=Republicans) = 0.2
P(State=California) = 0.5


from graphviz import Digraph
dot = Digraph()
dot.edges([(var_names[v] ,var_names[f.vars[0]]) for f in factors for v in f.vars[1:]])
dot


from graphviz import Graph
fg = Graph()
for f in factors:
    fg.node(var_names[f.vars[0]]+"_", var_names[f.vars[0]], shape="box")
fg.edges([(var_names[f.vars[0]]+"_" ,var_names[v]) for f in factors for v in f.vars])
fg


import itertools    
def brute_force_marginalize(value_list):
    """
    observed_values is a list of values one for each variable. For values set to None,
    we marginalize over all possible values of the corresponding variable. For other
    values, we use the given value for the corresponding variables when evaluating factors.
    """
    value_ranges = [range(n) if v is None else (v,) for n,v in zip(ncategories, value_list)]
    marginal_prob = 0.0
    # itertools.product let us iterate over all possible values of all variables
    for values in itertools.product(*value_ranges):
        joint_prob = 1.0
        for f in factors:
           joint_prob *= f.table[tuple(values[v] for v in f.vars)]
        marginal_prob+=joint_prob
    return marginal_prob

print("Z = ", brute_force_marginalize([None, None, None, None, None, None]))

Z =  0.9999999999999999


print("P(Voting=InPerson)  =", brute_force_marginalize([None, None, None, None, None, 0]))
print("P(Voting=Postal)    =", brute_force_marginalize([None, None, None, None, None, 1]))
print("P(Voting=NotVoting) =", brute_force_marginalize([None, None, None, None, None, 2]))
print("P(Party=Democrats)  =", brute_force_marginalize([None, None, None, 0, None, None]))
print("P(Party=Republicans)=", brute_force_marginalize([None, None, None, 1, None, None]))
print("P(Party=Republicans,Voting=InPerson)=", brute_force_marginalize([None, None, None, 1, None, 0]))

P(Voting=InPerson)  = 0.4061000000000001
P(Voting=Postal)    = 0.43120000000000014
P(Voting=NotVoting) = 0.1627
P(Party=Democrats)  = 0.6699999999999997
P(Party=Republicans)= 0.33000000000000024
P(Party=Republicans,Voting=InPerson)= 0.17159999999999995

BAYa class Assignment 2020¶

Tasks and Questions¶