Monday, 12 March 2018

Partitioned External Tables in Oracle Database 12c Realease 2 (12.2)

Partitioned external tables were introduced in Oracle Database 12c Release 2 (12.2), allowing external tables to benefit from partition pruning and partition-wise joins. With the exception of hash partitioning, many partitioning and subpartitioning strategies are supported with some restrictions.

Oracle Database Tutorials and Materials, Oracle Database Certifications, Oracle Database Learning

◈ Setup


In order to demonstrate an external table we need some data in flat files. The following code spools out four CSV files with 1000 rows each.

CONN / AS SYSDBA
ALTER SESSION SET CONTAINER = pdb1;

SET MARKUP CSV ON QUOTE ON
SET TRIMSPOOL ON LINESIZE 1000 FEEDBACK OFF PAGESIZE 0

SPOOL /tmp/gbr1.txt
SELECT 'GBR',
       object_id,
       owner,
       object_name
FROM   dba_objects
WHERE  object_id <= 2000
AND    rownum <= 1000;
SPOOL OFF

SPOOL /tmp/gbr2.txt
SELECT 'GBR',
       object_id,
       owner,
       object_name
FROM   dba_objects
WHERE  object_id BETWEEN 2000 AND 3999
AND    rownum <= 1000;
SPOOL OFF

SPOOL /tmp/ire1.txt
SELECT 'IRE',
       object_id,
       owner,
       object_name
FROM   dba_objects
WHERE  object_id <= 2000
AND    rownum <= 1000;
SPOOL OFF

SPOOL /tmp/ire2.txt
SELECT 'IRE',
       object_id,
       owner,
       object_name
FROM   dba_objects
WHERE  object_id BETWEEN 2000 AND 3999
AND    rownum <= 1000;
SPOOL OFF

SET MARKUP CSV OFF
SET TRIMSPOOL ON LINESIZE 1000 FEEDBACK OFF PAGESIZE 14

Create two directory objects to access these files. In this case both are pointing to the same directory, but it will still allow us to see the syntax.

CONN / AS SYSDBA
ALTER SESSION SET CONTAINER = pdb1;

CREATE OR REPLACE DIRECTORY tmp_dir1 AS '/tmp/';
GRANT READ, WRITE ON DIRECTORY tmp_dir1 TO test;

CREATE OR REPLACE DIRECTORY tmp_dir2 AS '/tmp/';
GRANT READ, WRITE ON DIRECTORY tmp_dir2 TO test;

◈ Partitioned External Tables


The following example creates a list partitioned external table based on the CSV files we created previously. Each partition can have a separate location definition, which can optionally include a directory definition. If a location is not defined the partition will be seen as empty.

CONN test/test@pdb1

DROP TABLE part_tab_ext;

CREATE TABLE part_tab_ext (
  country_code  VARCHAR2(3),
  object_id     NUMBER,
  owner         VARCHAR2(128),
  object_name   VARCHAR2(128)
)
ORGANIZATION EXTERNAL (
  TYPE oracle_loader
  DEFAULT DIRECTORY tmp_dir1
  ACCESS PARAMETERS (
    FIELDS CSV WITH EMBEDDED TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"'
    MISSING FIELD VALUES ARE NULL (
      country_code,
      object_id,
      owner,
      object_name
    )
  )
)
REJECT LIMIT UNLIMITED
PARTITION BY LIST (country_code) (
  PARTITION part_gbr    VALUES ('GBR') LOCATION ('gbr1.txt', 'gbr2.txt'),
  PARTITION part_usa    VALUES ('IRE') DEFAULT DIRECTORY tmp_dir2 LOCATION ('ire1.txt', 'ire2.txt'),
  PARTITION part_others VALUES ('XXX')
);

Querying the external table shows the files are being read correctly.

SELECT country_code, COUNT(*)
FROM   part_tab_ext
GROUP BY country_code
ORDER BY country_code;

COU   COUNT(*)
--- ----------
GBR   2000
IRE   2000

SQL>

If we gather statistics we can see how the rows were spread between the partitions.

EXEC DBMS_STATS.gather_table_stats(USER, 'part_tab_ext');

SET LINESIZE 120
COLUMN table_name FORMAT A20
COLUMN partition_name FORMAT A20
COLUMN high_value FORMAT A20

SELECT table_name,
       partition_name,
       high_value,
       num_rows
FROM   user_tab_partitions
WHERE  table_name = 'PART_TAB_EXT'
ORDER BY 1, 2;

TABLE_NAME  PARTITION_NAME HIGH_VALUE NUM_ROWS
-------------------- -------------------- -------------------- ----------
PART_TAB_EXT      PART_GBR   'GBR'      2000
PART_TAB_EXT      PART_OTHERS   'XXX' 0
PART_TAB_EXT      PART_USA   'IRE'      2000

SQL>

◈ Subpartitioned External Tables


The following example creates a list-range subpartitioned external table based on the CSV files we created previously. Each subpartition can have a separate location definition, which can optionally include a directory definition at partition or subpartition level.

CONN test/test@pdb1

DROP TABLE subpart_tab_ext;

CREATE TABLE subpart_tab_ext (
  country_code  VARCHAR2(3),
  object_id     NUMBER,
  owner         VARCHAR2(128),
  object_name   VARCHAR2(128)
)
ORGANIZATION EXTERNAL (
  TYPE oracle_loader
  DEFAULT DIRECTORY tmp_dir1
  ACCESS PARAMETERS (
    FIELDS CSV WITH EMBEDDED TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"'
    MISSING FIELD VALUES ARE NULL (
      country_code,
      object_id,
      owner,
      object_name
    )
  )
)
REJECT LIMIT UNLIMITED
PARTITION BY LIST (country_code)
SUBPARTITION BY RANGE (object_id) (
  PARTITION part_gbr VALUES ('GBR') (
    SUBPARTITION subpart_gbr_le_2000 VALUES LESS THAN (2000) LOCATION ('gbr1.txt'),
    SUBPARTITION subpart_gbr_gt_2000 VALUES LESS THAN (MAXVALUE) DEFAULT DIRECTORY tmp_dir2 LOCATION ('gbr2.txt')
  ),
  PARTITION part_ire VALUES ('IRE') DEFAULT DIRECTORY tmp_dir2 (
    SUBPARTITION subpart_ire_le_2000 VALUES LESS THAN (2000) LOCATION ('ire1.txt'),
    SUBPARTITION subpart_ire_gt_2000 VALUES LESS THAN (MAXVALUE) LOCATION ('ire2.txt')
  )
);

Querying the external table shows the files are being read correctly.

SELECT country_code, COUNT(*)
FROM   subpart_tab_ext
GROUP BY country_code
ORDER BY country_code;

COU   COUNT(*)
---   ----------
GBR   2000
IRE   2000

SQL>

If we gather statistics we can see how the rows were spread between the partitions.

EXEC DBMS_STATS.gather_table_stats(USER, 'subpart_tab_ext');

SET LINESIZE 120
COLUMN table_name FORMAT A20
COLUMN partition_name FORMAT A20
COLUMN subpartition_name FORMAT A20
COLUMN high_value FORMAT A20

SELECT table_name,
       partition_name,
       subpartition_name,
       high_value,
       num_rows
FROM   user_tab_subpartitions
WHERE  table_name = 'SUBPART_TAB_EXT'
ORDER BY 1, 2;

TABLE_NAME PARTITION_NAME  SUBPARTITION_NAME  HIGH_VALUE  NUM_ROWS
SUBPART_TAB_EXT PART_GBR SUBPART_GBR_LE_2000 2000 1000
SUBPART_TAB_EXT  PART_GBR  SUBPART_GBR_GT_2000  MAXVALUE   1000
SUBPART_TAB_EXT  PART_IRE  SUBPART_IRE_LE_2000  2000   1000
SUBPART_TAB_EXT  PART_IRE  SUBPART_IRE_GT_2000  MAXVALUE   1000

SQL>