import numpy as np
import pandas as pd
import jpype
import jpype.imports
from jpype.types import *
from jpype import java
from com.bayesserver.data import *
def _to_java_class(data_type):
"""
Converts numpy data type to equivalent Java class
:param data_type: the numpy data type
:return: The Java Class
"""
if data_type == np.int32:
return java.lang.Integer(0).getClass()
if data_type == np.int64:
return java.lang.Long(0).getClass()
if data_type == np.float32:
return java.lang.Float(0).getClass()
if data_type == np.float64:
return java.lang.Double(0.0).getClass()
if data_type == np.bool:
return java.lang.Boolean(False).getClass()
if data_type == np.object:
return java.lang.String().getClass()
raise ValueError('dtype [{}] not currently supported'.format(data_type))
def to_data_table(df):
data_table = DataTable()
cols = data_table.getColumns()
for name, data_type in df.dtypes.iteritems():
java_class = _to_java_class(data_type)
data_column = DataColumn(name, java_class)
cols.add(data_column)
for index, row in df.iterrows():
xs = [None if pd.isnull(x) else x for x in row]
data_table.getRows().add(xs)
return data_table