public abstract class DataHandle
extends java.lang.Object
| Modifier and Type | Field and Description |
|---|---|
protected DataType<?>[] |
columnToDataType
The data types.
|
protected DataDefinition |
definition
The data definition.
|
protected java.lang.String[] |
header
The header.
|
protected com.carrotsearch.hppc.ObjectIntOpenHashMap<java.lang.String> |
headerMap
The header.
|
protected ARXLattice.ARXNode |
node
The node.
|
protected org.deidentifier.arx.DataRegistry |
registry
The current registry.
|
protected DataHandle |
subset
The current research subset.
|
| Constructor and Description |
|---|
DataHandle() |
| Modifier and Type | Method and Description |
|---|---|
protected void |
checkColumn(int column1)
Checks a column index.
|
protected void |
checkColumns(int[] columns)
Checks the column indexes.
|
protected void |
checkReleased()
Checks whether a registry is referenced.
|
protected void |
checkRow(int row1,
int length)
Checks a row index.
|
protected abstract void |
doRelease()
Releases all resources.
|
abstract java.lang.String |
getAttributeName(int col)
Returns the name of the specified column.
|
protected DataType<?> |
getBaseDataType(java.lang.String attribute)
Returns the base data type without generalization.
|
int |
getColumnIndexOf(java.lang.String attribute)
Returns the index of the given attribute, -1 if it is not in the header.
|
protected abstract DataType<?>[] |
getColumnToDataType()
Generates an array of data types.
|
protected abstract ARXConfiguration |
getConfiguration()
Returns the ARXConfiguration that is currently being used, null if this is an input handle
|
DataType<?> |
getDataType(java.lang.String attribute)
Returns the according data type.
|
java.util.Date |
getDate(int row,
int col)
Returns a date/time value from the specified cell.
|
DataDefinition |
getDefinition()
Returns the data definition.
|
java.lang.String[] |
getDistinctValues(int column)
Returns an array containing the distinct values in the given column.
|
protected abstract java.lang.String[] |
getDistinctValues(int column,
boolean ignoreSuppression,
DataHandleInternal.InterruptHandler handler)
Returns the distinct values.
|
java.lang.Double |
getDouble(int row,
int col)
Returns a double value from the specified cell.
|
java.lang.Float |
getFloat(int row,
int col)
Returns a float value from the specified cell.
|
abstract int |
getGeneralization(java.lang.String attribute)
Returns the generalization level for the attribute.
|
java.lang.Integer |
getInt(int row,
int col)
Returns an int value from the specified cell.
|
java.lang.Long |
getLong(int row,
int col)
Returns a long value from the specified cell.
|
java.util.List<org.apache.commons.math3.util.Pair<DataType<?>,java.lang.Double>> |
getMatchingDataTypes(int column)
Returns a mapping from data types to the relative number of values that conform to the according type.
|
<U> java.util.List<org.apache.commons.math3.util.Pair<DataType<?>,java.lang.Double>> |
getMatchingDataTypes(int column,
java.lang.Class<U> clazz)
Returns a mapping from data types to the relative number of values that conform to the according type for a given wrapped class.
|
<U> java.util.List<org.apache.commons.math3.util.Pair<DataType<?>,java.lang.Double>> |
getMatchingDataTypes(int column,
java.lang.Class<U> clazz,
double threshold)
Returns a mapping from data types to the relative number of values that conform to the according type for a given wrapped class.
|
<U> java.util.List<org.apache.commons.math3.util.Pair<DataType<?>,java.lang.Double>> |
getMatchingDataTypes(int column,
java.lang.Class<U> clazz,
java.util.Locale locale)
Returns a mapping from data types to the relative number of values that conform to the according type for a given wrapped class.
|
<U> java.util.List<org.apache.commons.math3.util.Pair<DataType<?>,java.lang.Double>> |
getMatchingDataTypes(int column,
java.lang.Class<U> clazz,
java.util.Locale locale,
double threshold)
Returns a mapping from data types to the relative number of values that conform to the according type for a given wrapped class.
|
java.util.List<org.apache.commons.math3.util.Pair<DataType<?>,java.lang.Double>> |
getMatchingDataTypes(int column,
double threshold)
Returns a mapping from data types to the relative number of values that conform to the according type.
|
java.util.List<org.apache.commons.math3.util.Pair<DataType<?>,java.lang.Double>> |
getMatchingDataTypes(int column,
java.util.Locale locale)
Returns a mapping from data types to the relative number of values that conform to the according type
This method only returns types that match at least 80% of all values in the column .
|
java.util.List<org.apache.commons.math3.util.Pair<DataType<?>,java.lang.Double>> |
getMatchingDataTypes(int column,
java.util.Locale locale,
double threshold)
Returns a mapping from data types to the relative number of values that conform to the according type.
|
java.lang.String[] |
getNonConformingValues(int column,
DataType<?> type,
int max)
Returns a set of values that do not conform to the given data type.
|
abstract int |
getNumColumns()
Returns the number of columns in the dataset.
|
int |
getNumConformingValues(int column,
DataType<?> type)
Returns the number of (distinct) values that conform to the given data type.
|
abstract int |
getNumRows()
Returns the number of rows in the dataset.
|
protected org.deidentifier.arx.DataRegistry |
getRegistry()
Returns the registry associated with this handle.
|
RiskEstimateBuilder |
getRiskEstimator()
Returns a risk estimator, using the US population if required
|
RiskEstimateBuilder |
getRiskEstimator(ARXPopulationModel model)
Returns a risk estimator
|
RiskEstimateBuilder |
getRiskEstimator(ARXPopulationModel model,
ARXSolverConfiguration config)
Returns a risk estimator
|
RiskEstimateBuilder |
getRiskEstimator(ARXPopulationModel model,
RiskModelHistogram classes)
Returns a risk estimator for the given set of equivalence classes.
|
RiskEstimateBuilder |
getRiskEstimator(ARXPopulationModel model,
RiskModelHistogram classes,
ARXSolverConfiguration config)
Returns a risk estimator for the given set of equivalence classes.
|
RiskEstimateBuilder |
getRiskEstimator(ARXPopulationModel model,
java.util.Set<java.lang.String> qis)
Returns a risk estimator for the given set of quasi-identifiers
|
RiskEstimateBuilder |
getRiskEstimator(ARXPopulationModel model,
java.util.Set<java.lang.String> qis,
ARXSolverConfiguration config)
Returns a risk estimator for the given set of quasi-identifiers
|
abstract StatisticsBuilder |
getStatistics()
Returns an object providing access to basic descriptive statistics about the data represented
by this handle.
|
ARXLattice.ARXNode |
getTransformation()
Returns the transformation .
|
abstract java.lang.String |
getValue(int row,
int col)
Returns the value in the specified cell.
|
protected abstract int |
getValueIdentifier(int column,
java.lang.String value)
Returns the internal value identifier
|
DataHandle |
getView()
Returns a new data handle that represents a context specific view on the dataset.
|
protected int |
internalCompare(int row1,
int row2,
int[] columns,
boolean ascending)
A negative integer, zero, or a positive integer as the first argument is
less than, equal to, or greater than the second.
|
protected abstract int |
internalGetEncodedValue(int row,
int col,
boolean ignoreSuppression)
Internal representation of get encoded value.
|
protected abstract java.lang.String |
internalGetValue(int row,
int col,
boolean ignoreSuppression)
Internal representation of get value.
|
protected abstract boolean |
internalIsOutlier(int row,
int[] columns)
Returns whether this is an outlier regarding the given columns.
|
protected abstract boolean |
internalReplace(int column,
java.lang.String original,
java.lang.String replacement)
Internal replacement method.
|
protected boolean |
isAnonymous()
Returns whether the data represented by this handle is anonymous
|
boolean |
isOptimized()
Has this handle been optimized with local recoding?
|
boolean |
isOutlier(int row)
Determines whether a given row is an outlier in the currently associated
data transformation.
|
boolean |
isReleased()
Determines whether this handle is orphaned, i.e., should not be used anymore
|
boolean |
isSuppressed(int row)
Determines whether a given row is completely suppressed
|
abstract java.util.Iterator<java.lang.String[]> |
iterator()
Returns an iterator over the data.
|
void |
release()
Releases this handle and all associated resources.
|
ElementData |
render()
Renders this object
|
boolean |
replace(int column,
java.lang.String original,
java.lang.String replacement)
Replaces the original value with the replacement in the given column.
|
void |
save(java.io.File file)
Writes the data to a CSV file.
|
void |
save(java.io.File file,
char separator)
Writes the data to a CSV file.
|
void |
save(java.io.File file,
CSVSyntax config)
Writes the data to a CSV file.
|
void |
save(java.io.OutputStream out)
Writes the data to a CSV file.
|
void |
save(java.io.OutputStream out,
char separator)
Writes the data to a CSV file.
|
void |
save(java.io.OutputStream out,
CSVSyntax config)
Writes the data to a CSV file.
|
void |
save(java.lang.String path)
Writes the data to a CSV file.
|
void |
save(java.lang.String path,
char separator)
Writes the data to a CSV file.
|
void |
save(java.lang.String path,
CSVSyntax config)
Writes the data to a CSV file.
|
protected void |
setHeader(java.lang.String[] header)
Sets the current header
|
protected void |
setRegistry(org.deidentifier.arx.DataRegistry registry)
Updates the registry.
|
protected void |
setView(DataHandle handle)
Sets the subset.
|
void |
sort(boolean ascending,
int... columns)
Sorts the dataset according to the given columns.
|
void |
sort(int from,
int to,
boolean ascending,
int... columns)
Sorts the dataset according to the given columns and the given range.
|
void |
sort(cern.colt.Swapper swapper,
boolean ascending,
int... columns)
Sorts the dataset according to the given columns.
|
void |
sort(cern.colt.Swapper swapper,
int from,
int to,
boolean ascending,
int... columns)
Sorts the dataset according to the given columns and the given range.
|
void |
swap(int row1,
int row2)
Swaps both rows.
|
protected DataType<?>[] columnToDataType
protected DataDefinition definition
protected java.lang.String[] header
protected com.carrotsearch.hppc.ObjectIntOpenHashMap<java.lang.String> headerMap
protected ARXLattice.ARXNode node
protected org.deidentifier.arx.DataRegistry registry
protected DataHandle subset
public abstract java.lang.String getAttributeName(int col)
col - The column indexpublic int getColumnIndexOf(java.lang.String attribute)
attribute - the attributepublic DataType<?> getDataType(java.lang.String attribute)
attribute - the attributepublic java.util.Date getDate(int row,
int col)
throws java.text.ParseException
row - The cell's row indexcol - The cell's column indexjava.text.ParseException - the parse exceptionpublic DataDefinition getDefinition()
public final java.lang.String[] getDistinctValues(int column)
column - The column to processpublic java.lang.Double getDouble(int row,
int col)
throws java.text.ParseException
row - The cell's row indexcol - The cell's column indexjava.text.ParseException - the parse exceptionpublic java.lang.Float getFloat(int row,
int col)
throws java.text.ParseException
row - The cell's row indexcol - The cell's column indexjava.text.ParseException - the parse exceptionpublic abstract int getGeneralization(java.lang.String attribute)
attribute - the attributepublic java.lang.Integer getInt(int row,
int col)
throws java.text.ParseException
row - The cell's row indexcol - The cell's column indexjava.text.ParseException - the parse exceptionpublic java.lang.Long getLong(int row,
int col)
throws java.text.ParseException
row - The cell's row indexcol - The cell's column indexjava.text.ParseException - the parse exceptionpublic java.util.List<org.apache.commons.math3.util.Pair<DataType<?>,java.lang.Double>> getMatchingDataTypes(int column)
column - the columnpublic <U> java.util.List<org.apache.commons.math3.util.Pair<DataType<?>,java.lang.Double>> getMatchingDataTypes(int column, java.lang.Class<U> clazz)
U - the generic typecolumn - the columnclazz - The wrapped classpublic <U> java.util.List<org.apache.commons.math3.util.Pair<DataType<?>,java.lang.Double>> getMatchingDataTypes(int column, java.lang.Class<U> clazz, double threshold)
U - the generic typecolumn - the columnclazz - The wrapped classthreshold - Relative minimal number of values that must match to include a data type in the resultspublic <U> java.util.List<org.apache.commons.math3.util.Pair<DataType<?>,java.lang.Double>> getMatchingDataTypes(int column, java.lang.Class<U> clazz, java.util.Locale locale)
U - the generic typecolumn - the columnclazz - The wrapped classlocale - The locale to usepublic <U> java.util.List<org.apache.commons.math3.util.Pair<DataType<?>,java.lang.Double>> getMatchingDataTypes(int column, java.lang.Class<U> clazz, java.util.Locale locale, double threshold)
U - the generic typecolumn - the columnclazz - The wrapped classlocale - The locale to usethreshold - Relative minimal number of values that must match to include a data type in the resultspublic java.util.List<org.apache.commons.math3.util.Pair<DataType<?>,java.lang.Double>> getMatchingDataTypes(int column, double threshold)
column - the columnthreshold - Relative minimal number of values that must match to include a data type in the resultspublic java.util.List<org.apache.commons.math3.util.Pair<DataType<?>,java.lang.Double>> getMatchingDataTypes(int column, java.util.Locale locale)
column - the columnlocale - The locale to usepublic java.util.List<org.apache.commons.math3.util.Pair<DataType<?>,java.lang.Double>> getMatchingDataTypes(int column, java.util.Locale locale, double threshold)
column - the columnlocale - The locale to usethreshold - Relative minimal number of values that must match to include a data type in the resultspublic java.lang.String[] getNonConformingValues(int column,
DataType<?> type,
int max)
column - The column to testtype - The type to testmax - The maximal number of values returned by this methodpublic abstract int getNumColumns()
public int getNumConformingValues(int column,
DataType<?> type)
column - The column to testtype - The type to testpublic abstract int getNumRows()
public RiskEstimateBuilder getRiskEstimator()
public RiskEstimateBuilder getRiskEstimator(ARXPopulationModel model)
model - public RiskEstimateBuilder getRiskEstimator(ARXPopulationModel model, ARXSolverConfiguration config)
model - config - public RiskEstimateBuilder getRiskEstimator(ARXPopulationModel model, RiskModelHistogram classes)
model - classes - public RiskEstimateBuilder getRiskEstimator(ARXPopulationModel model, RiskModelHistogram classes, ARXSolverConfiguration config)
model - classes - config - public RiskEstimateBuilder getRiskEstimator(ARXPopulationModel model, java.util.Set<java.lang.String> qis)
model - qis - public RiskEstimateBuilder getRiskEstimator(ARXPopulationModel model, java.util.Set<java.lang.String> qis, ARXSolverConfiguration config)
model - qis - config - public abstract StatisticsBuilder getStatistics()
public ARXLattice.ARXNode getTransformation()
public abstract java.lang.String getValue(int row,
int col)
row - The cell's row indexcol - The cell's column indexpublic DataHandle getView()
public boolean isOptimized()
public boolean isOutlier(int row)
row - the rowpublic boolean isReleased()
public boolean isSuppressed(int row)
row - the rowpublic abstract java.util.Iterator<java.lang.String[]> iterator()
public void release()
public ElementData render()
public boolean replace(int column,
java.lang.String original,
java.lang.String replacement)
column - the columnoriginal - the originalreplacement - the replacementpublic void save(java.io.File file)
throws java.io.IOException
file - the filejava.io.IOException - Signals that an I/O exception has occurred.public void save(java.io.File file,
char separator)
throws java.io.IOException
file - A fileseparator - The utilized separator characterjava.io.IOException - Signals that an I/O exception has occurred.public void save(java.io.File file,
CSVSyntax config)
throws java.io.IOException
file - the fileconfig - the configjava.io.IOException - Signals that an I/O exception has occurred.public void save(java.io.OutputStream out)
throws java.io.IOException
out - the outjava.io.IOException - Signals that an I/O exception has occurred.public void save(java.io.OutputStream out,
char separator)
throws java.io.IOException
out - Output streamseparator - The utilized separator characterjava.io.IOException - Signals that an I/O exception has occurred.public void save(java.io.OutputStream out,
CSVSyntax config)
throws java.io.IOException
out - the outconfig - the configjava.io.IOException - Signals that an I/O exception has occurred.public void save(java.lang.String path)
throws java.io.IOException
path - the pathjava.io.IOException - Signals that an I/O exception has occurred.public void save(java.lang.String path,
char separator)
throws java.io.IOException
path - A pathseparator - The utilized separator characterjava.io.IOException - Signals that an I/O exception has occurred.public void save(java.lang.String path,
CSVSyntax config)
throws java.io.IOException
path - the pathconfig - the configjava.io.IOException - Signals that an I/O exception has occurred.public void sort(boolean ascending,
int... columns)
ascending - Sort ascending or descendingcolumns - An integer array containing column indicidespublic void sort(int from,
int to,
boolean ascending,
int... columns)
from - The lower boundto - The upper boundascending - Sort ascending or descendingcolumns - An integer array containing column indicidespublic void sort(cern.colt.Swapper swapper,
boolean ascending,
int... columns)
swapper - A swapperascending - Sort ascending or descendingcolumns - An integer array containing column indicidespublic void sort(cern.colt.Swapper swapper,
int from,
int to,
boolean ascending,
int... columns)
swapper - A swapperfrom - The lower boundto - The upper boundascending - Sort ascending or descendingcolumns - An integer array containing column indicidespublic void swap(int row1,
int row2)
row1 - the row1row2 - the row2protected void checkColumn(int column1)
column1 - the column1protected void checkColumns(int[] columns)
columns - the columnsprotected void checkReleased()
protected void checkRow(int row1,
int length)
row1 - the row1length - the lengthprotected abstract void doRelease()
protected DataType<?> getBaseDataType(java.lang.String attribute)
attribute - the attributeprotected abstract DataType<?>[] getColumnToDataType()
protected abstract ARXConfiguration getConfiguration()
protected abstract java.lang.String[] getDistinctValues(int column,
boolean ignoreSuppression,
DataHandleInternal.InterruptHandler handler)
column - the columnignoreSuppression - handler - the handlerprotected org.deidentifier.arx.DataRegistry getRegistry()
protected abstract int getValueIdentifier(int column,
java.lang.String value)
column - value - protected int internalCompare(int row1,
int row2,
int[] columns,
boolean ascending)
row1 - the row1row2 - the row2columns - the columnsascending - the ascendingprotected abstract int internalGetEncodedValue(int row,
int col,
boolean ignoreSuppression)
row - the rowcol - the colprotected abstract java.lang.String internalGetValue(int row,
int col,
boolean ignoreSuppression)
row - the rowcol - the colprotected abstract boolean internalIsOutlier(int row,
int[] columns)
true will be returned.row - columns - protected abstract boolean internalReplace(int column,
java.lang.String original,
java.lang.String replacement)
column - the columnoriginal - the originalreplacement - the replacementprotected boolean isAnonymous()
protected void setHeader(java.lang.String[] header)
header - protected void setRegistry(org.deidentifier.arx.DataRegistry registry)
registry - the new registryprotected void setView(DataHandle handle)
handle - the new view