2025-01-31 18:42:39 +00:00
// Copyright 2025, Command Line Inc.
// SPDX-License-Identifier: Apache-2.0
// Package tarcopy provides functions for copying files over a channel via a tar stream.
package tarcopy
import (
"archive/tar"
"context"
"errors"
"fmt"
"io"
"io/fs"
"log"
"path/filepath"
"strings"
"github.com/wavetermdev/waveterm/pkg/util/iochan"
"github.com/wavetermdev/waveterm/pkg/util/iochan/iochantypes"
2025-02-15 01:27:02 +00:00
"github.com/wavetermdev/waveterm/pkg/util/utilfn"
2025-01-31 18:42:39 +00:00
"github.com/wavetermdev/waveterm/pkg/wshrpc"
)
const (
tarCopySrcName = "TarCopySrc"
tarCopyDestName = "TarCopyDest"
pipeReaderName = "pipe reader"
pipeWriterName = "pipe writer"
tarWriterName = "tar writer"
2025-02-15 01:27:02 +00:00
// custom flag to indicate that the source is a single file
SingleFile = "singlefile"
2025-01-31 18:42:39 +00:00
)
// TarCopySrc creates a tar stream writer and returns a channel to send the tar stream to.
2025-02-15 01:27:02 +00:00
// writeHeader is a function that writes the tar header for the file. If only a single file is being written, the singleFile flag should be set to true.
2025-01-31 18:42:39 +00:00
// writer is the tar writer to write the file data to.
// close is a function that closes the tar writer and internal pipe writer.
2025-02-15 01:27:02 +00:00
func TarCopySrc ( ctx context . Context , pathPrefix string ) ( outputChan chan wshrpc . RespOrErrorUnion [ iochantypes . Packet ] , writeHeader func ( fi fs . FileInfo , file string , singleFile bool ) error , writer io . Writer , close func ( ) ) {
2025-01-31 18:42:39 +00:00
pipeReader , pipeWriter := io . Pipe ( )
tarWriter := tar . NewWriter ( pipeWriter )
rtnChan := iochan . ReaderChan ( ctx , pipeReader , wshrpc . FileChunkSize , func ( ) {
2025-02-15 01:27:02 +00:00
log . Printf ( "Closing pipe reader\n" )
utilfn . GracefulClose ( pipeReader , tarCopySrcName , pipeReaderName )
2025-01-31 18:42:39 +00:00
} )
2025-02-15 01:27:02 +00:00
singleFileFlagSet := false
return rtnChan , func ( fi fs . FileInfo , path string , singleFile bool ) error {
2025-01-31 18:42:39 +00:00
// generate tar header
2025-02-15 01:27:02 +00:00
header , err := tar . FileInfoHeader ( fi , path )
2025-01-31 18:42:39 +00:00
if err != nil {
return err
}
2025-02-15 01:27:02 +00:00
if singleFile {
if singleFileFlagSet {
return errors . New ( "attempting to write multiple files to a single file tar stream" )
}
header . PAXRecords = map [ string ] string { SingleFile : "true" }
singleFileFlagSet = true
}
path , err = fixPath ( path , pathPrefix )
if err != nil {
2025-01-31 18:42:39 +00:00
return err
}
2025-02-15 01:27:02 +00:00
// skip if path is empty, which means the file is the root directory
if path == "" {
return nil
}
header . Name = path
2025-01-31 18:42:39 +00:00
// write header
if err := tarWriter . WriteHeader ( header ) ; err != nil {
return err
}
return nil
} , tarWriter , func ( ) {
2025-02-15 01:27:02 +00:00
log . Printf ( "Closing tar writer\n" )
utilfn . GracefulClose ( tarWriter , tarCopySrcName , tarWriterName )
utilfn . GracefulClose ( pipeWriter , tarCopySrcName , pipeWriterName )
2025-01-31 18:42:39 +00:00
}
}
2025-02-15 01:27:02 +00:00
func fixPath ( path , prefix string ) ( string , error ) {
path = strings . TrimPrefix ( strings . TrimPrefix ( filepath . Clean ( strings . TrimPrefix ( path , prefix ) ) , "/" ) , "\\" )
2025-01-31 18:42:39 +00:00
if strings . Contains ( path , ".." ) {
2025-02-15 01:27:02 +00:00
return "" , fmt . Errorf ( "invalid tar path containing directory traversal: %s" , path )
2025-01-31 18:42:39 +00:00
}
2025-02-15 01:27:02 +00:00
return path , nil
2025-01-31 18:42:39 +00:00
}
// TarCopyDest reads a tar stream from a channel and writes the files to the destination.
2025-02-15 01:27:02 +00:00
// readNext is a function that is called for each file in the tar stream to read the file data. If only a single file is being written from the tar src, the singleFile flag will be set in this callback. It should return an error if the file cannot be read.
2025-01-31 18:42:39 +00:00
// The function returns an error if the tar stream cannot be read.
2025-02-15 01:27:02 +00:00
func TarCopyDest ( ctx context . Context , cancel context . CancelCauseFunc , ch <- chan wshrpc . RespOrErrorUnion [ iochantypes . Packet ] , readNext func ( next * tar . Header , reader * tar . Reader , singleFile bool ) error ) error {
2025-01-31 18:42:39 +00:00
pipeReader , pipeWriter := io . Pipe ( )
iochan . WriterChan ( ctx , pipeWriter , ch , func ( ) {
2025-02-15 01:27:02 +00:00
utilfn . GracefulClose ( pipeWriter , tarCopyDestName , pipeWriterName )
2025-01-31 18:42:39 +00:00
} , cancel )
tarReader := tar . NewReader ( pipeReader )
defer func ( ) {
2025-02-15 01:27:02 +00:00
if ! utilfn . GracefulClose ( pipeReader , tarCopyDestName , pipeReaderName ) {
2025-01-31 18:42:39 +00:00
// If the pipe reader cannot be closed, cancel the context. This should kill the writer goroutine.
cancel ( nil )
}
} ( )
for {
select {
case <- ctx . Done ( ) :
if ctx . Err ( ) != nil {
return context . Cause ( ctx )
}
return nil
default :
next , err := tarReader . Next ( )
if err != nil {
// Do one more check for context error before returning
if ctx . Err ( ) != nil {
return context . Cause ( ctx )
}
if errors . Is ( err , io . EOF ) {
return nil
} else {
return err
}
}
2025-02-15 01:27:02 +00:00
// Check for directory traversal
if strings . Contains ( next . Name , ".." ) {
return fmt . Errorf ( "invalid tar path containing directory traversal: %s" , next . Name )
}
err = readNext ( next , tarReader , next . PAXRecords != nil && next . PAXRecords [ SingleFile ] == "true" )
2025-01-31 18:42:39 +00:00
if err != nil {
return err
}
}
}
}