前回までのスクリプトをぐるぐる回すようにしてみました。
一銘柄ずつとると時間がかかるので並列で動くようにしています
- RFinanceYJPatch.R
quoteStockMasterTsData <- function(){ financial.data <- data.frame(NULL) function.stockMasterData<-function(hira){ r <- NULL result.num <- 20 master.data <- data.frame(NULL) start.num<-0 while( result.num >= 20 ){ start.num <- start.num + 1 quote.table <- NULL quote.url <- paste('http://stocks.finance.yahoo.co.jp/stocks/qi/?js=',hira,'&p=',start.num,sep="") try( r <- xmlRoot(htmlTreeParse(quote.url,error=xmlErrorCumulator(immediate=F))), TRUE) if( is.null(r) ) stop(paste("Can not access :", quote.url)) try( quote.table <- xpathApply(r,"//a[contains(@href,'/stocks/detail')]"), TRUE ) if( is.null(quote.table) ){ if( is.null(master.data) ){ stop(paste("Can not quote :", x)) }else{ return(master.data) } } size <- xmlSize(quote.table)/3 if(size==0){ return(master.data) } for(i in 1:size){ mtmp<-data.frame(code=xmlValue(quote.table[[i*3-2]]),name=xmlValue(quote.table[[i*3-1]])) mtmp$code<-as.character(mtmp$code) mtmp$name<-as.character(mtmp$name) master.data <- rbind(master.data,mtmp) } result.num <- xmlSize(quote.table)/3 Sys.sleep(1) } return(master.data) } hiraList<-"あいうえおかきくけこさしすせそたちつてとなにぬねのはひふへほまみむめもやゆよわ" for(i in 1:nchar(hiraList)){ hira<-substring(hiraList,i,i) master.data<-function.stockMasterData(hira) financial.data<-rbind(financial.data,master.data) } financial.data <- financial.data[order(financial.data$code),] return(financial.data) } #API quoteStockTsData <- function(x, since=NULL,start.num=0,date.end=NULL,time.interval='daily') { time.interval <- substr(time.interval,1,1) function.stock <- function(quote.table.item){ if( xmlSize(quote.table.item) < 5) return(NULL) d <- convertToDate(xmlValue(quote.table.item[[1]]),time.interval) o <- as.number(xmlValue(quote.table.item[[2]])) h <- as.number(xmlValue(quote.table.item[[3]])) l <- as.number(xmlValue(quote.table.item[[4]])) c <- as.number(xmlValue(quote.table.item[[5]])) v <- ifelse(xmlSize(quote.table.item) >= 6,as.number(xmlValue(quote.table.item[[6]])),0) a <- ifelse(xmlSize(quote.table.item) >= 7,as.number(xmlValue(quote.table.item[[7]])),0) return(data.frame(date=d,open=o,high=h,low=l,close=c,volume=v, adj_close=a)) } return(quoteTsData(x,function.stock,since,start.num,date.end,time.interval,type="stock")) } quoteFundTsData <- function(x, since=NULL,start.num=0,date.end=NULL,time.interval='daily') { time.interval <- substr(time.interval,1,1) function.fund <- function(quote.table.item){ d <- convertToDate(xmlValue(quote.table.item[[1]]),time.interval) if(time.interval=='monthly'){ d <- endOfMonth(d) } c <- as.number(xmlValue(quote.table.item[[2]])) v <- as.number(xmlValue(quote.table.item[[3]])) return(data.frame(date=d,constant.value=c,NAV=v)) } return(quoteTsData(x,function.fund,since,start.num,date.end,time.interval,type="fund")) } quoteFXTsData <- function(x, since=NULL,start.num=0,date.end=NULL,time.interval='daily') { time.interval <- substr(time.interval,1,1) function.fx <- function(quote.table.item){ d <- convertToDate(xmlValue(quote.table.item[[1]]),time.interval) o <- as.number(xmlValue(quote.table.item[[2]])) h <- as.number(xmlValue(quote.table.item[[3]])) l <- as.number(xmlValue(quote.table.item[[4]])) c <- as.number(xmlValue(quote.table.item[[5]])) return(data.frame(date=d,open=o,high=h,low=l,close=c)) } return(quoteTsData(x,function.fx,since,start.num,date.end,time.interval,type="fx")) } ###### private functions ##### #get time series data from Yahoo! Finance. quoteTsData <- function(x,function.financialproduct,since,start.num,date.end,time.interval,type="stock"){ r <- NULL result.num <- 51 financial.data <- data.frame(NULL) #start <- (gsub("([0-9]{4,4})-([0-9]{2,2})-([0-9]{2,2})","&c=\\1&a=\\2&b=\\3",since)) #end <- (gsub("([0-9]{4,4})-([0-9]{2,2})-([0-9]{2,2})","&f=\\1&d=\\2&e=\\3",date.end)) start <- (gsub("([0-9]{4,4})-([0-9]{2,2})-([0-9]{2,2})","&sy=\\1&sm=\\2&sd=\\3",since)) end <- (gsub("([0-9]{4,4})-([0-9]{2,2})-([0-9]{2,2})","&ey=\\1&em=\\2&ed=\\3",date.end)) if(!any(time.interval==c('d','w','m'))) stop("Invalid time.interval value") extractQuoteTable <- function(r,type){ if(type %in% c("fund","fx")){ tbl <- r[[2]][[2]][[7]][[3]][[3]][[9]][[2]] } else{ tbl <- r[[2]][[2]][[7]][[3]][[3]][[10]][[2]] } return(tbl) } while( result.num >= 51 ){ start.num <- start.num + 1 quote.table <- NULL quote.url <- paste('http://info.finance.yahoo.co.jp/history/?code=',x,start,end,'&p=',start.num,'&tm=',substr(time.interval,1,1),sep="") try( r <- xmlRoot(htmlTreeParse(quote.url,error=xmlErrorCumulator(immediate=F))), TRUE) if( is.null(r) ) stop(paste("Can not access :", quote.url)) #try( quote.table <- r[[2]][[1]][[1]][[16]][[1]][[1]][[1]][[4]][[1]][[1]][[1]], TRUE ) #try( quote.table <- extractQuoteTable(r,type), TRUE ) try( quote.table <- xpathApply(r,"//table")[[2]], TRUE ) if( is.null(quote.table) ){ if( is.null(financial.data) ){ stop(paste("Can not quote :", x)) }else{ financial.data <- financial.data[order(financial.data$date),] return(financial.data) } } size <- xmlSize(quote.table) for(i in 2:size){ financial.data <- rbind(financial.data,function.financialproduct(quote.table[[i]])) } result.num <- xmlSize(quote.table) Sys.sleep(1) } financial.data <- financial.data[order(financial.data$date),] return(financial.data) } #convert string formart date to POSIXct object convertToDate <- function(date.string,time.interval) { #data format is different between monthly and dialy or weekly if(any(time.interval==c('d','w'))){ result <- gsub("^([0-9]{4})([^0-9]+)([0-9]{1,2})([^0-9]+)([0-9]{1,2})([^0-9]+)","\\1-\\3-\\5",date.string) }else if(time.interval=='m'){ result <- gsub("^([0-9]{4})([^0-9]+)([0-9]{1,2})([^0-9]+)","\\1-\\3-01",date.string) } return(as.POSIXct(result)) } #convert string to number. as.number <- function(string) { return(as.double(as.character(gsub("[^0-9.]", "",string)))) } #return end of month date. endOfMonth <- function(date.obj) { startOfMonth <- as.Date(format(date.obj,"%Y%m01"),"%Y%m%d") startOfNextMonth <- as.Date(format(startOfMonth+31,"%Y%m01"),"%Y%m%d") return(startOfNextMonth-1) } library(RSQLite) # fromDate: yyyy-MM-dd getStockHistoricalData<-function(sqliteFile,codes,fromDate){ drv<-dbDriver("SQLite") con<-dbConnect(drv,dbname=sqliteFile) try(rs<-dbSendQuery(con,"drop table historicalData")) rs<-dbSendQuery(con,"create table historicalData (date text,open real,high real, low real,close real,volume real,adj_close real,code text)") rs<-dbSendQuery(con,"create unique index idx_historicalData on historicalData(code,date)") #stockMaster<-quoteStockMasterTsData() for(code in codes){ print(paste("code=",code,sep="")) tryCatch({ data<-quoteStockTsData(code,fromDate) data<-transform(data,code=code) data$code<-as.character(data$code) data$date<-as.character(data$date) dbBeginTransaction(con) dbSendQuery(con,paste("delete from historicalData where code='",code,"'",sep="")) dbSendPreparedQuery(con,"insert into historicalData (date,open,high,low,close,volume,adj_close,code) values(:date, :open, :high, :low, :close, :volume, :adj_close, :code)",bind.data=data) dbCommit(con) }, error = function(e){ message(paste("ERROR:",code)) message(e) }) } }
- getall.sh
#!/bin/sh getStockData(){ fromNo=$1 toNo=$2 fileNo=$3 /usr/bin/R --vanilla << Eof library(RFinanceYJ) source("/Users/utsuboka/Documents/prog/RFinanceYJPatch.R") stockMaster<-quoteStockMasterTsData() getStockHistoricalData("stock$fileNo.db",stockMaster\$code[$fromNo:$toNo],"2000-01-01") Eof } getStockData 1 500 1 & getStockData 501 1000 2 & getStockData 1001 1500 3 & getStockData 1501 2000 4 & getStockData 2001 2500 5 & getStockData 2501 3000 6 & getStockData 3001 3500 7 & getStockData 3501 4000 8 &
現在4000銘柄弱ですのでこれでOKだと思います。ただこれでもまだまだ時間がかかりますが全部取得できるかと思います