python - Is there a faster way of reading in GML files than this R code? -
i've got 11,000 gml files total 0.5tb when uncompressed. i've written code in r job files take 10 minutes each. code reads each file, takes out building polygons , saves rds file, rds file smaller , contain code need - make quicker read in on future uses.
any suggestions different ways of writing code appreciated, or there different language better @ stuff - python example? current approach run same code on multiple r sessions.
here's code:
library(rgdal) #get files in folders os_in_dir <- "d:/rhome/os_topo" os_out_dir <- "d:/rhome/os_topo_out" folders <- list.files(path = os_in_dir) # files <- list.files(path = paste0(os_in_dir,"/",folders[1]), pattern = "*.gz$") # o_files <- sub(".gz",".rds",files) (i in 1:length(folders)) { files <- list.files(path = paste0(os_in_dir,"/",folders[i]), pattern = "*.gz$") o_files <- sub(".gz",".rds",files) logevent(paste("processing folder:",folders[i],"-",length(files),"files process.")) (j in ifelse(i==23,95,1):length(files)) { logevent(paste("processing file:",files[j])) flag <- true ta <- na trycatch({ ta <<- readogr(paste0(os_in_dir,"/",folders[i],"/",files[j]), layer = 'topographicarea') #read in building polygon data }, error = function(e) { logevent(paste("error on file:",files[j])) flag <<- false } ) if (!flag) next #if errored skip on next file o_rc <- nrow(ta) ta <- ta[ta@data$theme1 == "buildings",] #get buildings data b_rc <- nrow(ta) pct_b <- round((b_rc/o_rc)*100) logevent(paste("done file:",files[j],"-",b_rc,"out of",o_rc,"were buildings (",pct_b,"% )")) if (b_rc > 0) { logevent(paste("saving file",files[j],"as",o_files[j])) saverds(ta, paste0(os_out_dir,"/",o_files[j])) #save file rds file } gc() } gc() }
Comments
Post a Comment