99import java .util .Arrays ;
1010import java .util .Collections ;
1111import java .util .List ;
12+ import java .util .Optional ;
13+ import java .util .regex .Pattern ;
1214import java .util .stream .Collectors ;
1315
1416import sqlancer .Randomly ;
@@ -32,6 +34,9 @@ public DataFusionSchema(List<DataFusionTable> databaseTables) {
3234
3335 // update existing tables in DB by query again
3436 // (like `show tables;`)
37+ //
38+ // This function also setup table<->column reference pointers
39+ // and equivalent tables(see `DataFusionTable.equivalentTables)
3540 public static DataFusionSchema fromConnection (SQLConnection con , String databaseName ) throws SQLException {
3641 List <DataFusionTable > databaseTables = new ArrayList <>();
3742 List <String > tableNames = getTableNames (con );
@@ -47,6 +52,24 @@ public static DataFusionSchema fromConnection(SQLConnection con, String database
4752 databaseTables .add (t );
4853 }
4954
55+ // Setup equivalent tables
56+ // For example, now we have t1, t1_csv, t1_parquet, t2_csv, t2_parquet
57+ // t1's equivalent tables: t1, t1_csv, t1_parquet
58+ // t2_csv's equivalent tables: t2_csv, t2_parquet
59+ // ...
60+ //
61+ // It can be assumed that:
62+ // base table names are like t1, t2, ...
63+ // equivalent tables are like t1_csv, t1_parquet, ...
64+ for (DataFusionTable t : databaseTables ) {
65+ String baseTableName = t .getName ().split ("_" )[0 ];
66+ String patternString = "^" + baseTableName + "(_.*)?$" ; // t1 or t1_*
67+ Pattern pattern = Pattern .compile (patternString );
68+
69+ t .equivalentTables = databaseTables .stream ().filter (table -> pattern .matcher (table .getName ()).matches ())
70+ .map (DataFusionTable ::getName ).collect (Collectors .toList ());
71+ }
72+
5073 return new DataFusionSchema (databaseTables );
5174 }
5275
@@ -120,8 +143,10 @@ public static DataFusionDataType parseFromDataFusionCatalog(String typeString) {
120143 return DataFusionDataType .BOOLEAN ;
121144 case "Utf8" :
122145 return DataFusionDataType .STRING ;
146+ case "Utf8View" :
147+ return DataFusionDataType .STRING ;
123148 default :
124- dfAssert (false , "Unreachable. All branches should be eovered" );
149+ dfAssert (false , "Uncovered branch typeString: " + typeString );
125150 }
126151
127152 dfAssert (false , "Unreachable. All branches should be eovered" );
@@ -169,25 +194,89 @@ public Node<DataFusionExpression> getRandomConstant(DataFusionGlobalState state)
169194 public static class DataFusionColumn extends AbstractTableColumn <DataFusionTable , DataFusionDataType > {
170195
171196 private final boolean isNullable ;
197+ public Optional <String > alias ;
172198
173199 public DataFusionColumn (String name , DataFusionDataType columnType , boolean isNullable ) {
174200 super (name , null , columnType );
175201 this .isNullable = isNullable ;
202+ this .alias = Optional .empty ();
176203 }
177204
178205 public boolean isNullable () {
179206 return isNullable ;
180207 }
181208
209+ public String getOrignalName () {
210+ return getTable ().getName () + "." + getName ();
211+ }
212+
213+ @ Override
214+ public String getFullQualifiedName () {
215+ if (getTable () == null ) {
216+ return getName ();
217+ } else {
218+ if (alias .isPresent ()) {
219+ return alias .get ();
220+ } else {
221+ return getTable ().getName () + "." + getName ();
222+ }
223+ }
224+ }
182225 }
183226
184227 public static class DataFusionTable
185228 extends AbstractRelationalTable <DataFusionColumn , TableIndex , DataFusionGlobalState > {
229+ // There might exist multiple logically equivalent tables with
230+ // different physical format.
231+ // e.g. t1_csv, t1_parquet, ...
232+ //
233+ // When generating random query, it's possible to randomly pick one
234+ // of them for stronger randomization.
235+ public List <String > equivalentTables ;
236+
237+ // Pick a random equivalent table name
238+ // This can be used when generating differential queries
239+ public Optional <String > currentEquivalentTableName ;
240+
241+ // For example in query `select * from t1 as tt1, t1 as tt2`
242+ // `tt1` is the alias for the first occurance of `t1`
243+ public Optional <String > alias ;
186244
187245 public DataFusionTable (String tableName , List <DataFusionColumn > columns , boolean isView ) {
188246 super (tableName , columns , Collections .emptyList (), isView );
189247 }
190248
249+ public String getNotAliasedName () {
250+ if (currentEquivalentTableName != null && currentEquivalentTableName .isPresent ()) {
251+ // In case setup is not done yet
252+ return currentEquivalentTableName .get ();
253+ } else {
254+ return super .getName ();
255+ }
256+ }
257+
258+ // TODO(datafusion) Now implementation is hacky, should send a patch
259+ // to core to support this
260+ @ Override
261+ public String getName () {
262+ // Before setup equivalent tables, we use the original table name
263+ // Setup happens in `fromConnection()`
264+ if (equivalentTables == null || currentEquivalentTableName == null ) {
265+ return super .getName ();
266+ }
267+
268+ if (alias .isPresent ()) {
269+ return alias .get ();
270+ } else {
271+ return currentEquivalentTableName .get ();
272+ }
273+ }
274+
275+ public void pickAnotherEquivalentTableName () {
276+ dfAssert (!equivalentTables .isEmpty (), "equivalentTables should not be empty" );
277+ currentEquivalentTableName = Optional .of (Randomly .fromList (equivalentTables ));
278+ }
279+
191280 public static List <DataFusionColumn > getAllColumns (List <DataFusionTable > tables ) {
192281 return tables .stream ().map (AbstractTable ::getColumns ).flatMap (List ::stream ).collect (Collectors .toList ());
193282 }
0 commit comments