Collections singletonList add multiple values

Java Developer Manual¶

The following guide provides step-by-step instructions to get started writing Java applications using Kinetica. This guide demonstrates only a small set of the available API. A detailed description of the complete interface is available under Java API Reference.

  • Prerequisites
    • Data File
    • API Download
      • Maven Download
      • Manual Download
    • File Hierarchy
  • Connecting to the Database
  • Creating a Type
  • Creating a Table
  • Inserting Data
    • Key/Value Record
    • In-Line Record
    • CSV
  • Retrieving Data
  • Updating and Removing Records
  • Alter Table
    • Indexes
    • Compression
    • Dictionary Encoding
  • Filters
    • Filter Example 1
    • Filter Example 2
    • Filter Example 3
    • Filter Example 4
  • Aggregates
    • Aggregate Example 1
    • Aggregate Example 2
    • Aggregate Example 3
    • Aggregate Example 4
  • Joins
  • Projections
  • Union, Intersect, and Except
  • Deleting Records
  • Download & Run

Prerequisites¶

Data File¶

The tutorial java file makes reference to a data file in the current directory. This path can be updated to point to a valid path on the host where the file will be located, or the script can be run with the data file in the current directory.

Scanner scanner = new Scanner[new File["taxi_trip_data.csv"]];

API Download¶

Maven Download¶

We suggest using Maven as the build tool for your Java project. To use the Kinetica Java API, you must add our Nexus repository and the Kinetica Java API dependency that matches that of the targeted Kinetica database, as shown below:

[7.0.0.0,7.1.0.0-SNAPSHOT]
gpudb-releases //files.kinetica.com/nexus/content/repositories/releases/
com.gpudb gpudb-api ${gpudb-api.version} jar

Important

The pom.xml file used for the tutorial can be found below

Manual Download¶

The source code for the Java API is also available for download from the GitHub repository kineticadb/kinetica-api-java. Follow the instructions in the included README file to build the API library.

File Hierarchy¶

The tutorial was setup like the below diagram:

java_tutorial/ docsite-tutorial-2.0-jar-with-dependencies.jar java_tutorial.out pom.xml src/ main/ java/ Tutorial.java

Connecting to the Database¶

To connect to the database, instantiate an object of the GPUdb class, providing the connection URL, including host & port of the database server:

GPUdb gpudb = new GPUdb["//localhost:9191"];

Creating a Type¶

Before any data can be loaded into the system, a Type needs to be defined in the system. The type is a class, extended from RecordObject, using annotations to describe which class instance variables are fields [i.e. columns], what type they are, and any special handling they should receive. Each field consists of a name and a data type:

public static class Vendor extends RecordObject { /* Create column[s], establish its ordering, give it property * sub-type[s], give it a column type, and give it a name. */ @RecordObject.Column[order = 0, properties = { "char4", "primary_key" }] public String vendor_id; @RecordObject.Column[order = 1, properties = { "char64" }] public String vendor_name; @RecordObject.Column[order = 2, properties = { "char16", "nullable" }] public String phone; @RecordObject.Column[order = 3, properties = { "char64", "nullable" }] public String email; @RecordObject.Column[order = 4, properties = { "char64" }] public String hq_street; @RecordObject.Column[order = 5, properties = { "char8", "dict" }] public String hq_city; @RecordObject.Column[order = 6, properties = { "char2", "dict" }] public String hq_state; @RecordObject.Column[order = 7] public Integer hq_zip; @RecordObject.Column[order = 8] public Integer num_emps; @RecordObject.Column[order = 9] public Integer num_cabs; public Vendor[] {} /* Create a constructor for the class that will take parameters so that * Bulk Inserting is easier */ public Vendor[ String vendor_id, String vendor_name, String phone, String email, String hq_street, String hq_city, String hq_state, Integer hq_zip, Integer num_emps, Integer num_cabs ] { this.vendor_id = vendor_id; this.vendor_name = vendor_name; this.phone = phone; this.email = email; this.hq_street = hq_street; this.hq_city = hq_city; this.hq_state = hq_state; this.hq_zip = hq_zip; this.num_emps = num_emps; this.num_cabs = num_cabs; } } public static class Payment extends RecordObject { @RecordObject.Column[order = 0, properties = { "primary_key" }] public long payment_id; @RecordObject.Column[order = 1, properties = { "char16", "nullable" }] public String payment_type; @RecordObject.Column[order = 2, properties = { "char16", "nullable" }] public String credit_type; @RecordObject.Column[order = 3, properties = { "timestamp", "nullable" }] public Long payment_timestamp; @RecordObject.Column[order = 4, properties = { "nullable" }] public double fare_amount; @RecordObject.Column[order = 5, properties = { "nullable" }] public double surcharge; @RecordObject.Column[order = 6, properties = { "nullable" }] public double mta_tax; @RecordObject.Column[order = 7, properties = { "nullable" }] public double tip_amount; @RecordObject.Column[order = 8, properties = { "nullable" }] public double tolls_amount; @RecordObject.Column[order = 9, properties = { "nullable" }] public double total_amount; public Payment[] {} public Payment[ long payment_id, String payment_type, String credit_type, Long payment_timestamp, double fare_amount, double surcharge, double mta_tax, double tip_amount, double tolls_amount, double total_amount ] { this.payment_id = payment_id; this.payment_type = payment_type; this.credit_type = credit_type; this.payment_timestamp = payment_timestamp; this.fare_amount = fare_amount; this.surcharge = surcharge; this.mta_tax = mta_tax; this.tip_amount = tip_amount; this.tolls_amount = tolls_amount; this.total_amount = total_amount; } } public static class TaxiTripData extends RecordObject { @RecordObject.Column[order = 0, properties = { "primary_key" }] public long transaction_id; @RecordObject.Column[order = 1, properties = { "primary_key", "shard_key"}] public long payment_id; @RecordObject.Column[order = 2, properties = { "char4" }] public String vendor_id; @RecordObject.Column[order = 3, properties = { "timestamp" }] public long pickup_datetime; @RecordObject.Column[order = 4, properties = { "timestamp" }] public long dropoff_datetime; @RecordObject.Column[order = 5, properties = { "int8" }] public int passenger_count; @RecordObject.Column[order = 6] public float trip_distance; @RecordObject.Column[order = 7] public float pickup_longitude; @RecordObject.Column[order = 8] public float pickup_latitude; @RecordObject.Column[order = 9] public float dropoff_longitude; @RecordObject.Column[order = 10] public float dropoff_latitude; public TaxiTripData[] {} }

Note

Although a constructor is not required, if the class does have any constructors, it must have a constructor with no parameters. Other constructors can be added, as necessary

Next, the types need to be created:

String vendorTypeId = RecordObject.createType[Vendor.class, gpudb]; String paymentTypeId = RecordObject.createType[Payment.class, gpudb]; String taxiTypeId = RecordObject.createType[TaxiTripData.class, gpudb];

Creating a Table¶

The returned object from the createType[] call contains a unique type identifier allocated by the system. This identifier can then be used in the request to create a new table. The examples below outline creating a table with either an options map [Vendor] or options object [Payment]:

// Create the Vendor table using an options map Map optionCollectionReplicated = GPUdb.options[ CreateTableRequest.Options.COLLECTION_NAME, COLLECTION, CreateTableRequest.Options.IS_REPLICATED, "true" ]; gpudb.createTable[ TABLE_VENDOR, vendorTypeId, optionCollectionReplicated ];
// Create the Payment table using the options object gpudb.createTable[ TABLE_PAYMENT, paymentTypeId, GPUdb.options[ CreateTableRequest.Options.COLLECTION_NAME, COLLECTION ] ];
gpudb.createTable[ TABLE_TAXI, taxiTypeId, GPUdb.options[ CreateTableRequest.Options.COLLECTION_NAME, COLLECTION ] ];

Inserting Data¶

Once the table is created, data can be inserted into it. There is a convenience class called BulkInserter, which facilitates inserting records into a table in batches, documented under Multi-Head Ingest. For this tutorial, only the native Java API call insertRecords[] will be shown.

Key/Value Record¶

// Create a record object and assign values to properties Payment paymentDatum = new Payment[]; paymentDatum.payment_id = 189; paymentDatum.payment_type = "No Charge"; paymentDatum.credit_type = null; paymentDatum.payment_timestamp = null; paymentDatum.fare_amount = 6.5; paymentDatum.surcharge = 0; paymentDatum.mta_tax = 0.6; paymentDatum.tip_amount = 0; paymentDatum.tolls_amount = 0; paymentDatum.total_amount = 7.1; // Insert the record into the table int numInserted = gpudb.insertRecords[ TABLE_PAYMENT, Collections.singletonList[paymentDatum], null ].getCountInserted[]; System.out.println[ "Number of records inserted into the Payment table: " + numInserted ];

In-Line Record¶

/* Create a list of in-line records. The order of the values must match * the column order in the type */ List vendorRecords = new ArrayList[]; vendorRecords.add[ new Vendor[ "VTS","Vine Taxi Service","9998880001", "","26 Summit St.","Flushing","NY", 11354,450,400 ]]; vendorRecords.add[ new Vendor[ "YCAB","Yes Cab","7895444321",null,"97 Edgemont St.", "Brooklyn","NY",11223,445,425 ]]; vendorRecords.add[ new Vendor[ "NYC","New York City Cabs",null,"", "9669 East Bayport St.","Bronx","NY",10453,505,500 ]]; vendorRecords.add[ new Vendor[ "DDS","Dependable Driver Service",null,null, "8554 North Homestead St.","Bronx","NY",10472,200,124 ]]; vendorRecords.add[ new Vendor[ "CMT","Crazy Manhattan Taxi","9778896500", "","950 4th Road Suite 78", "Brooklyn","NY",11210,500,468 ]]; vendorRecords.add[ new Vendor[ "TNY","Taxi New York",null,null,"725 Squaw Creek St.", "Bronx","NY",10458,315,305 ]]; vendorRecords.add[ new Vendor[ "NYMT","New York Metro Taxi",null,null, "4 East Jennings St.","Brooklyn","NY",11228,166,150 ]]; vendorRecords.add[ new Vendor[ "5BTC","Five Boroughs Taxi Co.","4566541278", "","9128 Lantern Street","Brooklyn","NY", 11229,193,175 ]]; // Insert the records into the Vendor table numInserted = gpudb.insertRecords[ TABLE_VENDOR, vendorRecords, null ].getCountInserted[]; System.out.println[ "Number of records inserted into the Vendor table: " + numInserted ];

Important

Additional records are inserted at this point, which can be found in the full Tutorial.java file below

CSV¶

This example requires the util and io libraries but allows for importing a large amount of records with ease. After setting up a Scanner and File instance, you can loop over all values in a .csv file, append the values to lists of a list, then insert the list.

try { Scanner scanner = new Scanner[new File["taxi_trip_data.csv"]]; List taxiRecords = new ArrayList[]; scanner.nextLine[]; while [scanner.hasNextLine[]] { String[] record = scanner.nextLine[].split[",", -1]; TaxiTripData taxiRecord = new TaxiTripData[]; taxiRecord.transaction_id = Long.parseLong[record[0]]; taxiRecord.payment_id = Long.parseLong[record[1]]; taxiRecord.vendor_id = record[2]; taxiRecord.pickup_datetime = Long.parseLong[record[3]]; taxiRecord.dropoff_datetime = Long.parseLong[record[4]]; taxiRecord.passenger_count = Integer.parseInt[record[5]]; taxiRecord.trip_distance = Float.parseFloat[record[6]]; taxiRecord.pickup_longitude = Float.parseFloat[record[7]]; taxiRecord.pickup_latitude = Float.parseFloat[record[8]]; taxiRecord.dropoff_longitude = Float.parseFloat[record[9]]; taxiRecord.dropoff_latitude = Float.parseFloat[record[10]]; taxiRecords.add[taxiRecord]; } numInserted = gpudb.insertRecords[ TABLE_TAXI, taxiRecords, null ].getCountInserted[]; System.out.println[ "Number of records inserted into the Taxi table: " + numInserted ]; } catch [FileNotFoundException e] { e.printStackTrace[]; }

Retrieving Data¶

Once the table is populated with data, the data can be retrieved from the system by a call to getRecords[tableName, offset, limit, options] using in-line parameter-passing.

// Retrieve no more than 10 records from payments using in-line request parameters GetRecordsResponse getPaymentRecordsResp = gpudb.getRecords[ TABLE_PAYMENT, 0, 10, GPUdb.options[GetRecordsRequest.Options.SORT_BY,"payment_id"] ]; System.out.println[ "Payment ID Payment Type Credit Type Payment Timestamp " + "Fare Amount Surcharge MTA Tax Tip Amount Tolls Amount Total Amount" ]; System.out.println[ "========== ============ =========== ================= " + "=========== ========= ======= ========== ============ ============" ]; for [Payment p : getPaymentRecordsResp.getData[]] System.out.printf[ "%10d %-12s %-11s %17d %11.2f %9.2f %7.2f %10.2f %12.2f %12.2f %n", p.payment_id, p.payment_type, p.credit_type, p.payment_timestamp, p.fare_amount, p.surcharge, p.mta_tax, p.tip_amount, p.tolls_amount, p.total_amount ];

One can also invoke getRecords[request] using the GetRecordsRequest request class. This object contains all the parameters for the method call, and can be reused in successive calls, avoiding re-specifying query parameters.

// Retrieve all records from the Vendor table using a request object GetRecordsRequest vendorReq = new GetRecordsRequest[]; vendorReq.setTableName[TABLE_VENDOR]; vendorReq.setOffset[0]; vendorReq.setLimit[GPUdb.END_OF_SET]; vendorReq.setOptions[ GPUdb.options[GetRecordsRequest.Options.SORT_BY, "vendor_id"] ]; GetRecordsResponse vendorResp = gpudb.getRecords[vendorReq]; System.out.println[ "Vendor ID Vendor Name Phone Email " + "HQ Street HQ City HQ State HQ Zip " + "# Employees # Cabs" ]; System.out.println[ "========= ========================== =========== ============================= " + "======================== ======== ======== ====== " + "=========== ======"]; for [Vendor v : vendorResp.getData[]] System.out.printf[ "%-9s %-26s %-11s %-29s " + "%-24s %-8s %-8s %-6d " + "%11d %6d%n", v.vendor_id, v.vendor_name, v.phone, v.email, v.hq_street, v.hq_city, v.hq_state, v.hq_zip, v.num_emps, v.num_cabs ];

For large tables, the data can be easily be retrieved in smaller blocks by using the offset and limit parameters. The returned response also contains the schema [or data type] of the results.

Also, note that all query related methods have the above two versions--with the request object and with the parameters passed directly to the method.

Updating and Removing Records¶

Use updateRecords[] to update matching key values for all records in a table.

// Update the e-mail, number of employees, and number of cabs of the DDS vendor List newValsList = new ArrayList[]; Map newVals = new HashMap[]; newVals.put["email", "''"]; newVals.put["num_emps", "num_emps + 2"]; newVals.put["num_cabs", "num_cabs + 1"]; newValsList.add[newVals]; gpudb.updateRecords[ TABLE_VENDOR, Collections.singletonList["vendor_id = 'DDS'"], newValsList, null, GPUdb.options[ UpdateRecordsRequest.Options.USE_EXPRESSIONS_IN_NEW_VALUES_MAPS, UpdateRecordsRequest.Options.TRUE ] ];

Use deleteRecords[] to delete records from a table. A list can be used to specify which records delete based on matching expressions.

String delExpr = "payment_id = 189"; System.out.println["Deleting record where " + delExpr]; gpudb.deleteRecords[ TABLE_PAYMENT, Collections.singletonList[delExpr], null ];

Alter Table¶

Some properties can be altered or added after table creation, including indexes, dictionary encoding, and compression.

Indexes¶

Using the alterTable method, you can create indexes on columns using the create_index action paired with a column name.

/* Add column indexes on: * - payment table, fare_amount [for query-chaining filter example] * - taxi table, passenger_count [for filter-by-range example] */ gpudb.alterTable[ TABLE_PAYMENT, "create_index", "fare_amount", null ]; gpudb.alterTable[ TABLE_TAXI, "create_index", "passenger_count", null ];

Compression¶

Applying column compression works similarly: using the alter_table method but with a set_column_compression action paired with a column name and compression type option.

/* Apply the snappy compression algorithm to the pickup and dropoff * datetime columns */ Map snappyComp = GPUdb.options[ AlterTableRequest.Options.COMPRESSION_TYPE, "snappy" ]; gpudb.alterTable[ TABLE_TAXI, "set_column_compression", "pickup_datetime", snappyComp ]; gpudb.alterTable[ TABLE_TAXI, "set_column_compression", "dropoff_datetime", snappyComp ];

Important

Column compression is applied at a fixed interval, so be sure to verify later that the compression has been added. Column usage should decrease by roughly 23% [~1989 bytes]

Dictionary Encoding¶

Applying dictionary encoding via alter_table involves adding a new property to a column.

// Apply dictionary encoding to the payment type column AlterTableResponse dictEncResp = gpudb.alterTable[ TABLE_TAXI, AlterTableRequest.Action.CHANGE_COLUMN, columnName, GPUdb.options[ AlterTableRequest.Options.COLUMN_PROPERTIES, "char4,dict" ] ];

Important

To add a new property, all existing column properties must be listed along with any new property

Filters¶

Filters are an easy way to reduce larger table into more concise views using expressions.

Filter Example 1¶

// Selects all payments with no corresponding payment type Map optionCollection = GPUdb.options[ "collection_name", COLLECTION ]; long f1Count = gpudb.filter[ TABLE_PAYMENT, VIEW_EXAMPLE1, "IS_NULL[payment_type]", optionCollection ].getCount[]; System.out.println["Number of null payments: " + f1Count];

Filter Example 2¶

// Using query chaining, filter null payment type records with a fare amount greater than 8 long f2Count = gpudb.filter[ VIEW_EXAMPLE1, VIEW_EXAMPLE2, "fare_amount > 8", optionCollection ].getCount[]; System.out.println[ "Number of null payments with a fare amount greater than $8.00 " + "[with query chaining]: " + f2Count ];

Filter Example 3¶

// Filter by list where vendor ID is either NYC or YCAB Map columnValuesMap = new HashMap[]; columnValuesMap.put["vendor_id", Arrays.asList["NYC", "YCAB"]]; long f3Count = gpudb.filterByList[ TABLE_TAXI, VIEW_EXAMPLE3, columnValuesMap, null ].getCount[]; System.out.println[ "Number of records where vendor_id is either NYC or YCAB: " + f3Count ];

Filter Example 4¶

// Filter by range trip with passenger count between 1 and 3 long f4Count = gpudb.filterByRange[ TABLE_TAXI, VIEW_EXAMPLE4, "passenger_count", 1, 3, null ].getCount[]; System.out.println[ "Number of trips with passenger_count between 1 and 3: " + f4Count ];

Aggregates¶

Kinetica supports various aggregate and group-by queries, which group and aggregate your data to return counts and useful statistics.

Aggregate Example 1¶

// Aggregate count, min, mean, and max on the trip distance Map a1Resp = gpudb.aggregateStatistics[ TABLE_TAXI, "trip_distance", AggregateStatisticsRequest.Stats.COUNT + "," + AggregateStatisticsRequest.Stats.MIN + "," + AggregateStatisticsRequest.Stats.MAX + "," + AggregateStatisticsRequest.Stats.MEAN, null ].getStats[]; System.out.println["Statistics of values in the trip_distance column:"]; System.out.printf[ "\tCount: %.0f%n\tMin: %4.2f%n\tMean: %4.2f%n\tMax: %4.2f%n%n", a1Resp.get[AggregateStatisticsRequest.Stats.COUNT], a1Resp.get[AggregateStatisticsRequest.Stats.MIN], a1Resp.get[AggregateStatisticsRequest.Stats.MEAN], a1Resp.get[AggregateStatisticsRequest.Stats.MAX] ];

Aggregate Example 2¶

// Find unique taxi vendor IDs List a2Resp = gpudb.aggregateUnique[ TABLE_TAXI, "vendor_id", 0, GPUdb.END_OF_SET, null ].getData[]; System.out.println["Unique vendor IDs in the taxi trip table:"]; for [Record vendor : a2Resp] System.out.println["\t* " + vendor.get["vendor_id"]];

Aggregate Example 3¶

// Find number of trips per vendor List colNames = Arrays.asList["vendor_id", "count[vendor_id]"]; List a3Resp = gpudb.aggregateGroupBy[ TABLE_TAXI, colNames, 0, GPUdb.END_OF_SET, GPUdb.options[ AggregateGroupByRequest.Options.SORT_BY, AggregateGroupByRequest.Options.KEY ] ].getData[]; System.out.println["Trips per vendor:"]; for [Record vendor : a3Resp] System.out.printf[ "\t%-5s %3d%n", vendor.get["vendor_id"] + ":", vendor.get["count[vendor_id]"] ];

Aggregate Example 4¶

// Create a histogram for the different groups of passenger counts float start = 1; float end = 6; float interval = 1; List a4Resp = gpudb.aggregateHistogram[ TABLE_TAXI, "passenger_count", start, end, interval, null ].getCounts[]; System.out.println["Passenger count groups by size:"]; System.out.println["Passengers Total Trips"]; System.out.println["========== ==========="]; List countGroups = Arrays.asList["1", "2", "3", "4", ">5"]; for [int hgNum = 0; hgNum

Chủ Đề