A tool for backing up ATProto related data to S3

initial commit with a working PDS repo and blob backup

Signed-off-by: Will Andrews <will7989@hotmail.com>

willdot.net 2b30b6c9

+300
+6
.env.example
··· 1 + ENDPOINT="S3-endpoint" 2 + ACCESS_ID="S3-ID" 3 + SECRET_ACCESS_KEY="S3-secret" 4 + BUCKET_NAME="my-super-duper-bucket" 5 + DID="the-did-to-backup" 6 + PDS_HOST="https://your-pds.com"
+1
.gitignore
··· 1 + .env
+27
go.mod
··· 1 + module tangled.sh/willdot.net/backatit 2 + 3 + go 1.25.0 4 + 5 + require ( 6 + github.com/joho/godotenv v1.5.1 7 + github.com/minio/minio-go/v7 v7.0.95 8 + ) 9 + 10 + require ( 11 + github.com/dustin/go-humanize v1.0.1 // indirect 12 + github.com/go-ini/ini v1.67.0 // indirect 13 + github.com/goccy/go-json v0.10.5 // indirect 14 + github.com/google/uuid v1.6.0 // indirect 15 + github.com/klauspost/compress v1.18.0 // indirect 16 + github.com/klauspost/cpuid/v2 v2.2.11 // indirect 17 + github.com/minio/crc64nvme v1.0.2 // indirect 18 + github.com/minio/md5-simd v1.1.2 // indirect 19 + github.com/philhofer/fwd v1.2.0 // indirect 20 + github.com/rs/xid v1.6.0 // indirect 21 + github.com/stretchr/testify v1.10.0 // indirect 22 + github.com/tinylib/msgp v1.3.0 // indirect 23 + golang.org/x/crypto v0.39.0 // indirect 24 + golang.org/x/net v0.41.0 // indirect 25 + golang.org/x/sys v0.33.0 // indirect 26 + golang.org/x/text v0.26.0 // indirect 27 + )
+43
go.sum
··· 1 + github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 2 + github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 + github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= 4 + github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= 5 + github.com/go-ini/ini v1.67.0 h1:z6ZrTEZqSWOTyH2FlglNbNgARyHG8oLW9gMELqKr06A= 6 + github.com/go-ini/ini v1.67.0/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3Ies8= 7 + github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4= 8 + github.com/goccy/go-json v0.10.5/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= 9 + github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= 10 + github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= 11 + github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= 12 + github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= 13 + github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= 14 + github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= 15 + github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= 16 + github.com/klauspost/cpuid/v2 v2.2.11 h1:0OwqZRYI2rFrjS4kvkDnqJkKHdHaRnCm68/DY4OxRzU= 17 + github.com/klauspost/cpuid/v2 v2.2.11/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0= 18 + github.com/minio/crc64nvme v1.0.2 h1:6uO1UxGAD+kwqWWp7mBFsi5gAse66C4NXO8cmcVculg= 19 + github.com/minio/crc64nvme v1.0.2/go.mod h1:eVfm2fAzLlxMdUGc0EEBGSMmPwmXD5XiNRpnu9J3bvg= 20 + github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34= 21 + github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM= 22 + github.com/minio/minio-go/v7 v7.0.95 h1:ywOUPg+PebTMTzn9VDsoFJy32ZuARN9zhB+K3IYEvYU= 23 + github.com/minio/minio-go/v7 v7.0.95/go.mod h1:wOOX3uxS334vImCNRVyIDdXX9OsXDm89ToynKgqUKlo= 24 + github.com/philhofer/fwd v1.2.0 h1:e6DnBTl7vGY+Gz322/ASL4Gyp1FspeMvx1RNDoToZuM= 25 + github.com/philhofer/fwd v1.2.0/go.mod h1:RqIHx9QI14HlwKwm98g9Re5prTQ6LdeRQn+gXJFxsJM= 26 + github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 27 + github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 28 + github.com/rs/xid v1.6.0 h1:fV591PaemRlL6JfRxGDEPl69wICngIQ3shQtzfy2gxU= 29 + github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0= 30 + github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= 31 + github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 32 + github.com/tinylib/msgp v1.3.0 h1:ULuf7GPooDaIlbyvgAxBV/FI7ynli6LZ1/nVUNu+0ww= 33 + github.com/tinylib/msgp v1.3.0/go.mod h1:ykjzy2wzgrlvpDCRc4LA8UXy6D8bzMSuAF3WD57Gok0= 34 + golang.org/x/crypto v0.39.0 h1:SHs+kF4LP+f+p14esP5jAoDpHU8Gu/v9lFRK6IT5imM= 35 + golang.org/x/crypto v0.39.0/go.mod h1:L+Xg3Wf6HoL4Bn4238Z6ft6KfEpN0tJGo53AAPC632U= 36 + golang.org/x/net v0.41.0 h1:vBTly1HeNPEn3wtREYfy4GZ/NECgw2Cnl+nK6Nz3uvw= 37 + golang.org/x/net v0.41.0/go.mod h1:B/K4NNqkfmg07DQYrbwvSluqCJOOXwUjeb/5lOisjbA= 38 + golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= 39 + golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= 40 + golang.org/x/text v0.26.0 h1:P42AVeLghgTYr4+xUnTRKDMqpar+PtX7KWuNQL21L8M= 41 + golang.org/x/text v0.26.0/go.mod h1:QK15LZJUUQVJxhz7wXgxSy/CJaTFjd0G+YLonydOVQA= 42 + gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 43 + gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+214
main.go
··· 1 + package main 2 + 3 + import ( 4 + "archive/zip" 5 + "context" 6 + "encoding/json" 7 + "fmt" 8 + "io" 9 + "log/slog" 10 + "net/http" 11 + "os" 12 + 13 + "github.com/joho/godotenv" 14 + "github.com/minio/minio-go/v7" 15 + "github.com/minio/minio-go/v7/pkg/credentials" 16 + ) 17 + 18 + func main() { 19 + ctx := context.Background() 20 + 21 + err := godotenv.Load(".env") 22 + if err != nil { 23 + if !os.IsNotExist(err) { 24 + slog.Error("load env", "error", err) 25 + return 26 + } 27 + } 28 + 29 + minioClient, err := createMinioClient() 30 + if err != nil { 31 + slog.Error("create minio client", "error", err) 32 + return 33 + } 34 + 35 + bucketName := os.Getenv("BUCKET_NAME") 36 + 37 + err = minioClient.MakeBucket(ctx, bucketName, minio.MakeBucketOptions{}) 38 + if err != nil { 39 + slog.Error("create bucket", "error", err) 40 + return 41 + } 42 + 43 + err = backupRepo(ctx, minioClient, bucketName) 44 + if err != nil { 45 + slog.Error("backup repo", "error", err) 46 + return 47 + } 48 + 49 + err = backupBlobs(ctx, minioClient, bucketName) 50 + if err != nil { 51 + slog.Error("backup blobs", "error", err) 52 + return 53 + } 54 + } 55 + 56 + func createMinioClient() (*minio.Client, error) { 57 + endpoint := os.Getenv("ENDPOINT") 58 + accessKeyID := os.Getenv("ACCESS_ID") 59 + secretAccessKey := os.Getenv("SECRET_ACCESS_KEY") 60 + useSSL := true 61 + 62 + return minio.New(endpoint, &minio.Options{ 63 + Creds: credentials.NewStaticV4(accessKeyID, secretAccessKey, ""), 64 + Secure: useSSL, 65 + }) 66 + } 67 + 68 + func backupRepo(ctx context.Context, minioClient *minio.Client, bucketName string) error { 69 + pdsHost := os.Getenv("PDS_HOST") 70 + did := os.Getenv("DID") 71 + 72 + url := fmt.Sprintf("%s/xrpc/com.atproto.sync.getRepo?did=%s", pdsHost, did) 73 + req, err := http.NewRequestWithContext(ctx, "GET", url, nil) 74 + if err != nil { 75 + return fmt.Errorf("create get repo request: %w", err) 76 + } 77 + 78 + req.Header.Add("ACCEPT", "application/vnd.ipld.car") 79 + resp, err := http.DefaultClient.Do(req) 80 + if err != nil { 81 + return fmt.Errorf("get repo: %w", err) 82 + } 83 + 84 + defer resp.Body.Close() 85 + 86 + _, err = minioClient.PutObject(ctx, bucketName, "pds-repo", resp.Body, -1, minio.PutObjectOptions{}) 87 + if err != nil { 88 + return fmt.Errorf("stream repo to bucket: %w", err) 89 + } 90 + 91 + return nil 92 + } 93 + 94 + func backupBlobs(ctx context.Context, minioClient *minio.Client, bucketName string) error { 95 + cids, err := getAllBlobCIDs(ctx) 96 + if err != nil { 97 + return fmt.Errorf("get all blob CIDs: %w", err) 98 + } 99 + 100 + reader, writer := io.Pipe() 101 + defer reader.Close() 102 + 103 + zipWriter := zip.NewWriter(writer) 104 + 105 + go func() { 106 + defer writer.Close() 107 + defer zipWriter.Close() 108 + 109 + for _, cid := range cids { 110 + slog.Info("processing cid", "cid", cid) 111 + blob, err := getBlob(ctx, cid) 112 + if err != nil { 113 + slog.Error("failed to get blob", "cid", cid, "error", err) 114 + continue 115 + } 116 + 117 + zipFile, err := zipWriter.Create(cid) 118 + if err != nil { 119 + slog.Error("create new file in zipwriter", "cid", cid, "error", err) 120 + blob.Close() 121 + continue 122 + } 123 + 124 + io.Copy(zipFile, blob) 125 + blob.Close() 126 + } 127 + }() 128 + 129 + _, err = minioClient.PutObject(ctx, bucketName, "pds-blobs.zip", reader, -1, minio.PutObjectOptions{}) 130 + if err != nil { 131 + return fmt.Errorf("stream blobs to bucket: %w", err) 132 + } 133 + 134 + return nil 135 + } 136 + 137 + func getAllBlobCIDs(ctx context.Context) ([]string, error) { 138 + cursor := "" 139 + limit := 100 140 + var cids []string 141 + for { 142 + res, err := listBlobs(ctx, cursor, int64(limit)) 143 + if err != nil { 144 + return nil, fmt.Errorf("list blobs: %w", err) 145 + } 146 + if len(res.CIDs) == 0 { 147 + return cids, nil 148 + } 149 + 150 + cids = append(cids, res.CIDs...) 151 + 152 + if len(res.CIDs) < limit { 153 + return cids, nil 154 + } 155 + 156 + cursor = res.Cursor 157 + } 158 + } 159 + 160 + type listBlobsResponse struct { 161 + Cursor string `json:"cursor"` 162 + CIDs []string `json:"cids"` 163 + } 164 + 165 + func listBlobs(ctx context.Context, cursor string, limit int64) (listBlobsResponse, error) { 166 + pdsHost := os.Getenv("PDS_HOST") 167 + did := os.Getenv("DID") 168 + 169 + // TODO: do proper url encoding of query params 170 + url := fmt.Sprintf("%s/xrpc/com.atproto.sync.listBlobs?did=%s&cursor=%s&limit=%d", pdsHost, did, cursor, limit) 171 + req, err := http.NewRequestWithContext(ctx, "GET", url, nil) 172 + if err != nil { 173 + return listBlobsResponse{}, fmt.Errorf("create list blobs request: %w", err) 174 + } 175 + 176 + resp, err := http.DefaultClient.Do(req) 177 + if err != nil { 178 + return listBlobsResponse{}, fmt.Errorf("list blobs: %w", err) 179 + } 180 + 181 + defer resp.Body.Close() 182 + 183 + resBody, err := io.ReadAll(resp.Body) 184 + if err != nil { 185 + return listBlobsResponse{}, fmt.Errorf("failed to read response: %w", err) 186 + } 187 + 188 + var result listBlobsResponse 189 + err = json.Unmarshal(resBody, &result) 190 + if err != nil { 191 + return listBlobsResponse{}, fmt.Errorf("failed to unmarshal response: %w", err) 192 + } 193 + 194 + return result, nil 195 + } 196 + 197 + func getBlob(ctx context.Context, cid string) (io.ReadCloser, error) { 198 + pdsHost := os.Getenv("PDS_HOST") 199 + did := os.Getenv("DID") 200 + 201 + // TODO: do proper url encoding of query params 202 + url := fmt.Sprintf("%s/xrpc/com.atproto.sync.getBlob?did=%s&cid=%s", pdsHost, did, cid) 203 + req, err := http.NewRequestWithContext(ctx, "GET", url, nil) 204 + if err != nil { 205 + return nil, fmt.Errorf("create get blob request: %w", err) 206 + } 207 + 208 + resp, err := http.DefaultClient.Do(req) 209 + if err != nil { 210 + return nil, fmt.Errorf("get blob: %w", err) 211 + } 212 + 213 + return resp.Body, nil 214 + }
+9
readme.md
··· 1 + ## Back AT it 2 + 3 + This is a tool I'm activly developing to back up my ATProtocol type things to S3 storage. 4 + 5 + At the moment it's a one shot style script that backs up the PDS repo and then the blobs but in the future I plan on being able to backup other things (next is my Tangled Knot data). 6 + 7 + The PDS repo data is pulled straight from the xrpc endpoint at sent straight to S3. The blob data however is streamed into a zip file and sent to S3 so that not all the data is held in memory while the backup takes place (the minio library will still keep some in memory as a multipart request). 8 + 9 + It's very hacky right now and needs polishing to use with caution. Although let's face it, the worst it can do at the moment it backup some bad data which is better than no data 🤪