merge {SparkR}R Documentation

Merges two data frames

Description

Merges two data frames

Usage

## S4 method for signature 'SparkDataFrame,SparkDataFrame'
merge(x, y, by = intersect(names(x),
  names(y)), by.x = by, by.y = by, all = FALSE, all.x = all,
  all.y = all, sort = TRUE, suffixes = c("_x", "_y"), ...)

merge(x, y, ...)

Arguments

x

the first data frame to be joined.

y

the second data frame to be joined.

by

a character vector specifying the join columns. If by is not specified, the common column names in x and y will be used. If by or both by.x and by.y are explicitly set to NULL or of length 0, the Cartesian Product of x and y will be returned.

by.x

a character vector specifying the joining columns for x.

by.y

a character vector specifying the joining columns for y.

all

a boolean value setting all.x and all.y if any of them are unset.

all.x

a boolean value indicating whether all the rows in x should be including in the join.

all.y

a boolean value indicating whether all the rows in y should be including in the join.

sort

a logical argument indicating whether the resulting columns should be sorted.

suffixes

a string vector of length 2 used to make colnames of x and y unique. The first element is appended to each colname of x. The second element is appended to each colname of y.

...

additional argument(s) passed to the method.

Details

If all.x and all.y are set to FALSE, a natural join will be returned. If all.x is set to TRUE and all.y is set to FALSE, a left outer join will be returned. If all.x is set to FALSE and all.y is set to TRUE, a right outer join will be returned. If all.x and all.y are set to TRUE, a full outer join will be returned.

Note

merge since 1.5.0

See Also

join crossJoin

Other SparkDataFrame functions: $, $,SparkDataFrame-method, $<-, $<-,SparkDataFrame-method, select, select, select,SparkDataFrame,Column-method, select,SparkDataFrame,character-method, select,SparkDataFrame,list-method; SparkDataFrame-class; [, [,SparkDataFrame-method, [[, [[,SparkDataFrame,numericOrcharacter-method, [[<-, [[<-,SparkDataFrame,numericOrcharacter-method, subset, subset, subset,SparkDataFrame-method; agg, agg, agg, agg,GroupedData-method, agg,SparkDataFrame-method, summarize, summarize, summarize, summarize,GroupedData-method, summarize,SparkDataFrame-method; alias, alias, alias, alias,Column-method, alias,SparkDataFrame-method; arrange, arrange, arrange, arrange,SparkDataFrame,Column-method, arrange,SparkDataFrame,character-method, orderBy,SparkDataFrame,characterOrColumn-method; as.data.frame, as.data.frame,SparkDataFrame-method; attach, attach,SparkDataFrame-method; broadcast, broadcast, broadcast,SparkDataFrame-method; cache, cache, cache,SparkDataFrame-method; checkpoint, checkpoint, checkpoint,SparkDataFrame-method; coalesce, coalesce, coalesce,SparkDataFrame-method; collect, collect, collect,SparkDataFrame-method; colnames, colnames, colnames,SparkDataFrame-method, colnames<-, colnames<-, colnames<-,SparkDataFrame-method, columns, columns, columns,SparkDataFrame-method, names, names,SparkDataFrame-method, names<-, names<-,SparkDataFrame-method; coltypes, coltypes, coltypes,SparkDataFrame-method, coltypes<-, coltypes<-, coltypes<-,SparkDataFrame,character-method; count,SparkDataFrame-method, nrow, nrow, nrow,SparkDataFrame-method; createOrReplaceTempView, createOrReplaceTempView, createOrReplaceTempView,SparkDataFrame,character-method; crossJoin, crossJoin,SparkDataFrame,SparkDataFrame-method; cube, cube, cube,SparkDataFrame-method; dapplyCollect, dapplyCollect, dapplyCollect,SparkDataFrame,function-method; dapply, dapply, dapply,SparkDataFrame,function,characterOrstructType-method; describe, describe, describe, describe,SparkDataFrame,ANY-method, describe,SparkDataFrame,character-method, describe,SparkDataFrame-method; dim, dim,SparkDataFrame-method; distinct, distinct, distinct,SparkDataFrame-method, unique, unique,SparkDataFrame-method; dropDuplicates, dropDuplicates, dropDuplicates,SparkDataFrame-method; dropna, dropna, dropna,SparkDataFrame-method, fillna, fillna, fillna,SparkDataFrame-method, na.omit, na.omit, na.omit,SparkDataFrame-method; drop, drop, drop, drop,ANY-method, drop,SparkDataFrame-method; dtypes, dtypes, dtypes,SparkDataFrame-method; except, except, except,SparkDataFrame,SparkDataFrame-method; explain, explain, explain, explain,SparkDataFrame-method, explain,StreamingQuery-method; filter, filter, filter,SparkDataFrame,characterOrColumn-method, where, where, where,SparkDataFrame,characterOrColumn-method; first, first, first, first,SparkDataFrame-method, first,characterOrColumn-method; gapplyCollect, gapplyCollect, gapplyCollect, gapplyCollect,GroupedData-method, gapplyCollect,SparkDataFrame-method; gapply, gapply, gapply, gapply,GroupedData-method, gapply,SparkDataFrame-method; getNumPartitions, getNumPartitions,SparkDataFrame-method; groupBy, groupBy, groupBy,SparkDataFrame-method, group_by, group_by, group_by,SparkDataFrame-method; head, head,SparkDataFrame-method; hint, hint, hint,SparkDataFrame,character-method; histogram, histogram,SparkDataFrame,characterOrColumn-method; insertInto, insertInto, insertInto,SparkDataFrame,character-method; intersect, intersect, intersect,SparkDataFrame,SparkDataFrame-method; isLocal, isLocal, isLocal,SparkDataFrame-method; isStreaming, isStreaming, isStreaming,SparkDataFrame-method; join, join,SparkDataFrame,SparkDataFrame-method; limit, limit, limit,SparkDataFrame,numeric-method; mutate, mutate, mutate,SparkDataFrame-method, transform, transform, transform,SparkDataFrame-method; ncol, ncol,SparkDataFrame-method; persist, persist, persist,SparkDataFrame,character-method; printSchema, printSchema, printSchema,SparkDataFrame-method; randomSplit, randomSplit, randomSplit,SparkDataFrame,numeric-method; rbind, rbind, rbind,SparkDataFrame-method; registerTempTable, registerTempTable, registerTempTable,SparkDataFrame,character-method; rename, rename, rename,SparkDataFrame-method, withColumnRenamed, withColumnRenamed, withColumnRenamed,SparkDataFrame,character,character-method; repartition, repartition, repartition,SparkDataFrame-method; rollup, rollup, rollup,SparkDataFrame-method; sample, sample, sample,SparkDataFrame-method, sample_frac, sample_frac, sample_frac,SparkDataFrame-method; saveAsParquetFile, saveAsParquetFile, saveAsParquetFile,SparkDataFrame,character-method, write.parquet, write.parquet, write.parquet,SparkDataFrame,character-method; saveAsTable, saveAsTable, saveAsTable,SparkDataFrame,character-method; saveDF, saveDF, saveDF,SparkDataFrame,character-method, write.df, write.df, write.df, write.df,SparkDataFrame-method; schema, schema, schema,SparkDataFrame-method; selectExpr, selectExpr, selectExpr,SparkDataFrame,character-method; showDF, showDF, showDF,SparkDataFrame-method; show, show, show,Column-method, show,GroupedData-method, show,SparkDataFrame-method, show,StreamingQuery-method, show,WindowSpec-method; storageLevel, storageLevel,SparkDataFrame-method; str, str,SparkDataFrame-method; summary, summary, summary,SparkDataFrame-method; take, take, take,SparkDataFrame,numeric-method; toJSON, toJSON,SparkDataFrame-method; unionByName, unionByName, unionByName,SparkDataFrame,SparkDataFrame-method; union, union, union,SparkDataFrame,SparkDataFrame-method, unionAll, unionAll, unionAll,SparkDataFrame,SparkDataFrame-method; unpersist, unpersist, unpersist,SparkDataFrame-method; withColumn, withColumn, withColumn,SparkDataFrame,character-method; with, with,SparkDataFrame-method; write.jdbc, write.jdbc, write.jdbc,SparkDataFrame,character,character-method; write.json, write.json, write.json,SparkDataFrame,character-method; write.orc, write.orc, write.orc,SparkDataFrame,character-method; write.stream, write.stream, write.stream,SparkDataFrame-method; write.text, write.text, write.text,SparkDataFrame,character-method

Examples

## Not run: 
##D sparkR.session()
##D df1 <- read.json(path)
##D df2 <- read.json(path2)
##D merge(df1, df2) # Performs an inner join by common columns
##D merge(df1, df2, by = "col1") # Performs an inner join based on expression
##D merge(df1, df2, by.x = "col1", by.y = "col2", all.y = TRUE)
##D merge(df1, df2, by.x = "col1", by.y = "col2", all.x = TRUE)
##D merge(df1, df2, by.x = "col1", by.y = "col2", all.x = TRUE, all.y = TRUE)
##D merge(df1, df2, by.x = "col1", by.y = "col2", all = TRUE, sort = FALSE)
##D merge(df1, df2, by = "col1", all = TRUE, suffixes = c("-X", "-Y"))
##D merge(df1, df2, by = NULL) # Performs a Cartesian join
## End(Not run)

[Package SparkR version 2.3.0 Index]